Skip to content

Commit

Permalink
Merge branch 'main' into website
Browse files Browse the repository at this point in the history
  • Loading branch information
Animenosekai committed Jun 25, 2022
2 parents 13b3a29 + 3cea8d1 commit 5b83145
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 119 deletions.
3 changes: 3 additions & 0 deletions requirements-server.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
nasse @ git+https://github.com/Animenosekai/nasse.git # the latest 'main' branch version
jinja2<3.1.0
itsdangerous==2.0.1
6 changes: 0 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,3 @@ beautifulsoup4
typing; python_version<"3.5" # for backward compatibility
pyuseragents
inquirer>=2.8.0


# server
git+https://github.com/Animenosekai/nasse # the latest 'main' branch version
jinja2<3.1.0
itsdangerous==2.0.1
50 changes: 39 additions & 11 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,15 @@

from setuptools import setup

with open(path.join(path.abspath(path.dirname(__file__)), 'README.md'), encoding='utf-8') as f:
with open(path.join(path.abspath(path.dirname(__file__)), "README.md"), encoding="utf-8") as f:
readme_description = f.read()


def read_requirements(filename):
with open(filename, "r", encoding="utf-8") as fp:
return fp.read().strip().splitlines()


setup(
name="translatepy",
packages=["translatepy"],
Expand All @@ -15,19 +21,41 @@
author_email="[email protected]",
url="https://github.com/Animenosekai/translate",
download_url="https://github.com/Animenosekai/translate/archive/v2.4.tar.gz",
keywords=['python', 'translate', 'translation', 'google-translate', 'yandex-translate', 'bing-translate', 'reverso', 'transliteration', 'detect-language', 'text-to-speech', 'deepl', 'language'],
install_requires=['requests', 'safeIO>=1.2', 'beautifulsoup4', 'typing; python_version<"3.5"', 'pyuseragents', 'inquirer>=2.8.0'],
classifiers=['Development Status :: 5 - Production/Stable', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9'],
keywords=[
"python",
"translate",
"translation",
"google-translate",
"yandex-translate",
"bing-translate",
"reverso",
"transliteration",
"detect-language",
"text-to-speech",
"deepl",
"language",
],
install_requires=read_requirements("requirements.txt"),
extras_require={"server": read_requirements("requirements-server.txt")},
classifiers=[
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.2",
"Programming Language :: Python :: 3.3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
long_description=readme_description,
long_description_content_type="text/markdown",
include_package_data=True,
python_requires='>=3.2, <4',
entry_points={
'console_scripts': [
'translatepy = translatepy.__main__:main'
]
},
python_requires=">=3.2, <4",
entry_points={"console_scripts": ["translatepy = translatepy.__main__:main"]},
package_data={
'translatepy': ['LICENSE'],
"translatepy": ["LICENSE"],
},
)
5 changes: 5 additions & 0 deletions translatepy/translators/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,9 @@ def text_to_speech(self, text: str, speed: int = 100, gender: str = "female", so
if gender not in {"male", "female"}:
raise ParameterValueError("Gender {gender} not supported. Supported genders: male, female".format(gender=gender))

if not isinstance(speed, int):
raise ParameterTypeError("Parameter 'speed' must be an integer, {} was given".format(type(speed).__name__))

# Build cache key
_cache_key = str({"t": text, "sp": speed, "s": source_code, "g": gender})

Expand Down Expand Up @@ -554,6 +557,8 @@ def _detect_and_validate_lang(self, language: str) -> str:
"""
if isinstance(language, Language):
result = language
elif not isinstance(language, str):
raise ParameterTypeError("Parameter 'language' must be a string, {} was given".format(type(language).__name__))
else:
result = Language(language)

Expand Down
19 changes: 12 additions & 7 deletions translatepy/translators/bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,18 @@ def __init__(self, request: Request, captcha_callback: Callable[[str], str] = No
self._parse_authorization_data()

def _parse_authorization_data(self):
_request = self.session.get("https://www.bing.com/translator")
_page = _request.text
_parsed_IG = re.findall('IG:"(.*?)"', _page)
_parsed_IID = re.findall('data-iid="(.*?)"', _page)
_parsed_helper_info = re.findall("params_RichTranslateHelper = (.*?);", _page)

if not _parsed_helper_info:
for _ in range(3):
_request = self.session.get("https://www.bing.com/translator")
_page = _request.text
_parsed_IG = re.findall('IG:"(.*?)"', _page)
_parsed_IID = re.findall('data-iid="(.*?)"', _page)
_parsed_helper_info = re.findall("params_RichTranslateHelper = (.*?);", _page)

if not _parsed_helper_info:
continue

break
else:
raise BingTranslateException(message="Can't parse the authorization data, try again later or use MicrosoftTranslate")

_normalized_key = json.loads(_parsed_helper_info[0])[0]
Expand Down
157 changes: 62 additions & 95 deletions translatepy/translators/yandex.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from translatepy.exceptions import UnsupportedMethod
from translatepy.language import Language
from translatepy.translators.base import BaseTranslateException, BaseTranslator
from translatepy.utils.lru_cacher import timed_lru_cache
from translatepy.utils.request import Request


Expand All @@ -36,48 +35,54 @@ class YandexTranslate(BaseTranslator):
Yandex Translation Implementation
"""

_api_url = "https://translate.yandex.net/api/v1/tr.json/{endpoint}"
_api_url = "http://translate.yandex.net/api/v1/tr.json/{endpoint}"
_supported_languages = {'auto', 'af', 'sq', 'am', 'ar', 'hy', 'az', 'ba', 'eu', 'be', 'bn', 'bs', 'bg', 'my', 'ca', 'ca', 'ceb', 'zh', 'cv', 'cs', 'da', 'nl', 'nl', 'en', 'eo', 'et', 'fi', 'fr', 'ka', 'de', 'gd', 'gd', 'ga', 'gl', 'el', 'gu', 'ht', 'ht', 'he', 'hi', 'hr', 'hu', 'is', 'id', 'it', 'jv', 'ja', 'kn', 'kk', 'km', 'ky', 'ky', 'ko', 'lo', 'la', 'lv', 'lt', 'lb', 'lb', 'mk', 'ml', 'mi', 'mr', 'ms', 'mg', 'mt', 'mn', 'mrj', 'mhr', 'ne', 'no', 'pa', 'pa', 'pap', 'fa', 'pl', 'pt', 'ro', 'ro', 'ro', 'ru', 'sah', 'si', 'si', 'sk', 'sl', 'es', 'es', 'sr', 'sjn', 'su', 'sw', 'sv', 'ta', 'tt', 'te', 'tg', 'tl', 'th', 'tr', 'udm', 'uk', 'ur', 'uz', 'vi', 'cy', 'xh', 'yi', 'zu', 'kazlat', 'uzbcyr', 'emj'}
# _language_aliases = {"zho": "zh"} # TODO: Feat: instead of declaring the _language_normalize and _language_denormalize methods every time in each class, we can simply put the value of the language codes aliases used by service to the _language_aliases class attribute. Syntax: {official_iso639_lang_code: service used_code}

def __init__(self, request: Request = Request()):
self.session = request
self.session.header = {"User-Agent": "ru.yandex.translate/3.20.2024"}
self.session.header = {"User-Agent": "ru.yandex.translate/22.11.8.22364114 (samsung SM-A505GM; Android 12)"} # TODO: generate random telephone model

@timed_lru_cache(360) # Store UUID value within 360 seconds
def _ucid(self) -> str:
uuid_v4 = str(uuid.uuid4())
self.session_ucid = uuid_v4.replace("-", "")
self.session_request_id = 0

def _ucid(self, session_state: bool = False) -> str:
"""
Generates UUID (UCID) for Yandex Translate API requests (USID analogue)
Generates UUID (UCID / (U)SID) for Yandex Translate API requests
Args:
Returns:
str --> new generated UUID value
str --> Yandex UUID value
"""
# Yandex Translate generally generates UUID V5, but API can accepts UUID V4 (bug or feature !?)
_uuid = str(uuid.uuid4())
_ucid = _uuid.replace("-", "")
return _ucid

if session_state:
request_id = self.session_request_id
self.session_request_id += 1
return "{ucid}-{request_id}-0".format(ucid=self.session_ucid, request_id=request_id)

return self.session_ucid

def _translate(self, text: str, destination_language: str, source_language: str) -> str:
if source_language == "auto":
source_language = self._language(text)

url = self._api_url.format(endpoint="translate")
params = {"ucid": self._ucid(), "srv": "android", "format": "text"}
params = {"sid": self._ucid(session_state=True), "srv": "android", "format": "text"}
data = {"text": text, "lang": source_language + "-" + destination_language}
request = self.session.post(url, params=params, data=data)
response = request.json()

if request.status_code < 400 and response["code"] == 200:
try:
_detected_language = str(data["lang"]).split("-")[0]
except Exception:
_detected_language = source_language
return _detected_language, response["text"][0]
else:
if request.status_code != 200 and response["code"] != 200:
raise YandexTranslateException(response["code"])

try:
_detected_language = str(data["lang"]).split("-")[0]
except Exception:
_detected_language = source_language

return _detected_language, response["text"][0]

def _transliterate(self, text: str, destination_language: str, source_language: str) -> str:
if source_language == "auto":
source_language = self._language(text)
Expand All @@ -86,109 +91,84 @@ def _transliterate(self, text: str, destination_language: str, source_language:
data = {'text': text, 'lang': source_language + "-" + destination_language}
request = self.session.post(url, data=data)

if request.status_code < 400:
return source_language, request.text[1:-1]
else:
if request.status_code != 200:
raise YandexTranslateException(request.status_code, request.text)

return source_language, request.text[1:-1]

def _spellcheck(self, text: str, source_language: str) -> str:
if source_language == "auto":
source_language = self._language(text)

url = "https://speller.yandex.net/services/spellservice.json/checkText"
params = {"ucid": self._ucid(), "srv": "android"}
params = {"sid": self._ucid(), "srv": "android"}
data = {"text": text, "lang": source_language, "options": 8 + 4}
request = self.session.post(url, params=params, data=data)
response = request.json()

if request.status_code < 400:
for correction in response:
if correction["s"]:
word = correction['word']
suggestion = correction['s'][0]
text = text.replace(word, suggestion)
return source_language, text
else:
if request.status_code != 200:
raise YandexTranslateException(request.status_code, request.text)
for correction in response:
if correction["s"]:
word = correction['word']
suggestion = correction['s'][0]
text = text.replace(word, suggestion)
return source_language, text

def _language(self, text: str):
url = self._api_url.format(endpoint="detect")
params = {"ucid": self._ucid(), "srv": "android"}
params = {"sid": self._ucid(), "srv": "android"}
data = {'text': text, 'hint': "en"}
request = self.session.get(url, params=params, data=data)
response = request.json()

if request.status_code < 400 and response["code"] == 200:
return response["lang"]
else:
if request.status_code != 200 and response["code"] != 200:
raise YandexTranslateException(response["code"])

return response["lang"]

def _example(self, text: str, destination_language: str, source_language: str):
if source_language == "auto":
source_language = self._language(text)

url = "https://dictionary.yandex.net/dicservice.json/queryCorpus"
params = {"ucid": self._ucid(), "srv": "android", "src": text, "ui": "en", "lang": source_language + "-" + destination_language, "flags": 7}
params = {"sid": self._ucid(), "srv": "android", "src": text, "ui": "en", "lang": source_language + "-" + destination_language, "flags": 7}
request = self.session.get(url, params=params)

if request.status_code < 400:
response = request.json()

_result = []
for examples_group in response["result"]:
for sentense in examples_group["examples"]:
_sentense_result = sentense["dst"]
_sentense_result = _sentense_result.replace("<", "").replace(">", "")
_result.append(_sentense_result)
return source_language, _result
else:
if request.status_code != 200:
raise YandexTranslateException(request.status_code, request.text)

def _dictionary(self, text: str, destination_language: str, source_language: str):
if source_language == "auto":
source_language = self._language(text)

url = "https://dictionary.yandex.net/dicservice.json/lookupMultiple"
params = {"ucid": self._ucid(), "srv": "android", "text": text, "ui": "en", "dict": source_language + "-" + destination_language, "flags": 7, "dict_type": "regular"}
request = self.session.get(url, params=params)

if request.status_code < 400:
response = request.json()

_result = []
response = request.json()

for word in response["{}-{}".format(source_language, destination_language)]["regular"]:
_word_result = word["tr"][0]["text"]
_result.append(_word_result)
_result = []

return source_language, _result
else:
raise YandexTranslateException(request.status_code, request.text)
for examples_group in response["result"]:
for sentense in examples_group["examples"]:
_sentense_result = sentense["dst"]
_sentense_result = _sentense_result.replace("<", "").replace(">", "")
_result.append(_sentense_result)

def _text_to_speech(self, text: str, speed: int, gender: str, source_language: str):
# TODO: Use Yandex Alice text to speech (Premium voices)
return source_language, _result

def _dictionary(self, text: str, destination_language: str, source_language: str):
if source_language == "auto":
source_language = self._language(text)

speech_lang_voices = {
"male": {"ru": ["ru_RU", "filipp"], "tr": ["tr_TR", "erkanyavas"], "en": ["en_US", "nick"]},
"female": {"ru": ["ru_RU", "alena"], "tr": ["tr_TR", "silaerkan"], "en": ["en_US", "alyss"]}
}
url = "https://dictionary.yandex.net/dicservice.json/lookupMultiple"
params = {"sid": self._ucid(), "srv": "android", "text": text, "ui": "en", "dict": source_language + "-" + destination_language, "flags": 7, "dict_type": "regular"}
request = self.session.get(url, params=params)

lang = speech_lang_voices[gender].get(source_language)
if request.status_code != 200:
raise YandexTranslateException(request.status_code, request.text)
response = request.json()

if lang is None:
raise UnsupportedMethod("Yandex SpeechKit doesn't support {source_lang} language".format(source_lang=source_language))
_result = []

url = "https://tts.voicetech.yandex.net/tts"
params = {"format": "mp3", "quality": "hi", "chunked": 0, "platform": "web", "mock-ranges": 1, "application": "translate", "lang": lang[0], "text": text, "voice": lang[1], "speed": speed / 100}
response = self.session.get(url, params=params, headers={"Content-Type": None})
for word in response["{}-{}".format(source_language, destination_language)]["regular"]:
_word_result = word["tr"][0]["text"]
_result.append(_word_result)

if response.status_code < 400:
return source_language, response.content
else:
raise YandexTranslateException(response.status_code, response.text)
return source_language, _result

def _language_normalize(self, language):
if language.id == "zho":
Expand All @@ -204,18 +184,5 @@ def _language_denormalize(self, language_code):
return Language("srd")
return Language(language_code)

"""
def _language_normalize(self, language):
return self._language_aliases.get(language.id, language.alpha2)
# return language.alpha2
def _language_denormalize(self, language_code):
for _language_code, _service_code in self._language_aliases.items():
if _service_code.lower() == language_code.lower():
language_code = _language_code
break
return Language(language_code)
"""

def __str__(self) -> str:
return "Yandex"

1 comment on commit 5b83145

@vercel
Copy link

@vercel vercel bot commented on 5b83145 Jun 25, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.