Skip to content

Commit

Permalink
refactor: Refactored translation engines.
Browse files Browse the repository at this point in the history
  • Loading branch information
bookfere committed Jun 16, 2024
1 parent 24bade1 commit 6fae2be
Show file tree
Hide file tree
Showing 13 changed files with 1,178 additions and 392 deletions.
8 changes: 7 additions & 1 deletion components/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,15 @@ def __init__(self, default=None):
self.refresh()

def layout(self):
for engine in builtin_engines:
engines = sorted(builtin_engines, key=lambda item: not item.free)
for engine in engines:
previous_index = engines.index(engine) - 1
if not engine.free and engines[previous_index].free:
self.insertSeparator(previous_index + 1)
self.addItem(_(engine.alias), engine.name)
custom_engines = get_config().get('custom_engines')
if len(custom_engines) > 0:
self.insertSeparator(len(builtin_engines) + 1)
for name in sorted(custom_engines.keys(), key=sorted_mixed_keys):
self.addItem(name, name)
self.default and self.setCurrentIndex(self.findData(self.default))
Expand Down
21 changes: 8 additions & 13 deletions engines/anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,37 +67,32 @@ def _get_prompt(self):
'{{id_\\d+}} in the content are retained.')
return prompt

def _get_headers(self):
def get_headers(self):
return {
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'x-api-key': self.api_key,
'User-Agent': 'Ebook-Translator/%s' % EbookTranslator.__version__,
}

def _get_data(self, text):
return {
def get_body(self, text):
body = {
'stream': self.stream,
'max_tokens': 4096,
'model': self.model,
'top_k': self.top_k,
'system': self._get_prompt(),
'messages': [{'role': 'user', 'content': text}]
}

def translate(self, text):
data = self._get_data(text)
sampling_value = getattr(self, self.sampling)
data.update({self.sampling: sampling_value})
body.update({self.sampling: sampling_value})

return self.get_result(
self.endpoint, json.dumps(data), self._get_headers(),
method='POST', stream=self.stream, callback=self._parse)
return json.dumps(body)

def _parse(self, data):
def get_result(self, response):
if self.stream:
return self._parse_stream(data)
return json.loads(data)['content'][0]['text']
return self._parse_stream(response)
return json.loads(response)['content'][0]['text']

def _parse_stream(self, data):
while True:
Expand Down
13 changes: 7 additions & 6 deletions engines/baidu.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ class BaiduTranslate(Base):
api_key_pattern = r'^[^\s:\|]+?[:\|][^\s:\|]+$'
api_key_errors = ['54004']

def translate(self, text):
def get_headers(self):
return {'Content-Type': 'application/x-www-form-urlencoded'}

def get_body(self, text):
try:
app_id, app_key = re.split(r'[:\|]', self.api_key)
except Exception:
Expand All @@ -32,8 +35,7 @@ def translate(self, text):
sign_str = app_id + text + str(salt) + app_key
sign = hashlib.md5(sign_str.encode('utf-8')).hexdigest()

headers = {'Content-Type': 'application/x-www-form-urlencoded'}
data = {
return {
'appid': app_id,
'q': text,
'from': self._get_source_code(),
Expand All @@ -42,6 +44,5 @@ def translate(self, text):
'sign': sign
}

return self.get_result(
self.endpoint, data, headers, method='POST',
callback=lambda r: json.loads(r)['trans_result'][0]['dst'])
def get_result(self, response):
return json.loads(response)['trans_result'][0]['dst']
116 changes: 51 additions & 65 deletions engines/base.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,26 @@
import ssl
import os.path

from mechanize import Browser, Request, HTTPError
from calibre import get_proxies
from mechanize import HTTPError

from calibre.utils.localization import lang_as_iso639_1

from ..lib.utils import traceback_error
from ..lib.utils import traceback_error, request
from ..lib.exception import UnexpectedResult, NoAvailableApiKey


load_translations()


class Base:
name = 'Unknown'
alias = 'Unknown'
name = None
alias = None
free = False

lang_codes = {}
config = {}
endpoint = 'https://example.com'
endpoint = None
method = 'POST'
stream = False
need_api_key = True
api_key_hint = _('API Keys')
api_key_pattern = r'^[^\s]+$'
Expand All @@ -38,10 +42,9 @@ def __init__(self):
self.search_paths = []

self.merge_enabled = False

self.api_keys = self.config.get('api_keys', [])[:]
self.bad_api_keys = []
self.api_key = self._get_api_key()
self.api_key = self.get_api_key()

concurrency_limit = self.config.get('concurrency_limit')
if concurrency_limit is not None:
Expand Down Expand Up @@ -88,16 +91,24 @@ def api_key_error_message(cls):
return _('A correct key format "{}" is required.') \
.format(cls.api_key_hint)

def change_api_key(self):
def disable_stream(self):
self.stream = False

def get_api_key(self):
if self.need_api_key and self.api_keys:
return self.api_keys.pop(0)
return None

def swap_api_key(self):
"""Change the API key if the previous one cannot be used."""
if self.api_key not in self.bad_api_keys:
self.bad_api_keys.append(self.api_key)
self.api_key = self._get_api_key()
self.api_key = self.get_api_key()
if self.api_key is not None:
return True
return False

def need_change_api_key(self, error_message):
def need_swap_api_key(self, error_message):
if self.need_api_key and len(self.api_keys) > 0:
for error in self.api_key_errors:
if error in error_message:
Expand All @@ -115,9 +126,6 @@ def get_external_program(self, name, paths=[]):
return path
return None

def set_endpoint(self, endpoint):
self.endpoint = endpoint

def set_merge_enabled(self, enable):
self.merge_enabled = enable

Expand Down Expand Up @@ -157,62 +165,40 @@ def _get_target_code(self):
def _is_auto_lang(self):
return self._get_source_code() == 'auto'

def _get_api_key(self):
if self.need_api_key and self.api_keys:
return self.api_keys.pop(0)
return None

def get_browser(self):
br = Browser()
br.set_handle_robots(False)
# Do not verify SSL certificates
br.set_ca_data(
context=ssl._create_unverified_context(cert_reqs=ssl.CERT_NONE))

proxies = {}
if self.proxy_uri is not None:
proxies.update(http=self.proxy_uri, https=self.proxy_uri)
else:
http = get_proxies(False).get('http')
http and proxies.update(http=http, https=http)
https = get_proxies(False).get('https')
https and proxies.update(https=https)
proxies and br.set_proxies(proxies)

return br

def get_result(self, url, data=None, headers={}, method='GET',
stream=False, silence=False, callback=None):
# Compatible with mechanize 0.3.0 on Calibre 3.21.
try:
request = Request(
url, data, headers=headers, timeout=self.request_timeout,
method=method)
except Exception:
request = Request(
url, data, headers=headers, timeout=self.request_timeout)
def translate(self, text):
try:
result = ''
br = self.get_browser()
br.open(request)
response = br.response()
if not stream:
response = request(
self.get_endpoint(), self.get_body(text), self.get_headers(),
self.method, self.request_timeout, self.proxy_uri)
if not self.stream:
response = result = response.read().decode('utf-8').strip()
return response if callback is None else callback(response)
return self.get_result(response)
except Exception as e:
if silence:
return None
error = [traceback_error()]
# Combine the error messages for investigation.
error_message = traceback_error()
if isinstance(e, HTTPError):
error.append(e.read().decode('utf-8'))
elif result:
error.append(result)
raise Exception(
error_message += '\n\n' + e.read().decode('utf-8')
elif result != '':
error_message += '\n\n' + result
# Swap a valid API key if necessary.
if self.need_swap_api_key(error_message) and self.swap_api_key():
return self.translate(text)
raise UnexpectedResult(
_('Can not parse returned response. Raw data: {}')
.format('\n\n' + '\n\n'.join(error)))
.format('\n\n' + error_message))

def get_usage(self):
def get_endpoint(self):
return self.endpoint

def get_headers(self):
return {}

def get_body(self, text):
return None

def translate(self, text):
raise NotImplementedError()
def get_result(self, response):
return response

def get_usage(self):
return None
35 changes: 18 additions & 17 deletions engines/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,41 +95,42 @@ class CustomTranslate(Base):
@classmethod
def set_engine_data(cls, data):
cls.name = data.get('name') # rename custom engine
cls.engine_data = data
cls.request = data.get('request')
cls.response = data.get('response')
cls.lang_codes = cls.load_lang_codes(data.get('languages'))

def translate(self, text):
request = self.engine_data.get('request')
def __init__(self):
Base.__init__(self)
self.endpoint = self.request.get('url')
self.method = self.request.get('method') or 'GET'

endpoint = request.get('url')
method = request.get('method') or 'GET'
headers = request.get('headers') or {}
def get_headers(self):
return self.request.get('headers') or {}

data = request.get('data')
need_restore = isinstance(data, dict)
data = json.dumps(data)
def get_body(self, text):
body = self.request.get('data')
need_restore = isinstance(body, dict)
body = json.dumps(body)
# The replacement may include UTF-8 characters that need to be encoded
# to ensure pure Latin-1 (compliance with ISO-8859-1).
data = data.replace('<source>', self._get_source_code()) \
body = body.replace('<source>', self._get_source_code()) \
.replace('<target>', self._get_target_code()) \
.replace('<text>', json.dumps(text)[1:-1]).encode('utf-8')
headers = self.get_headers()
is_json = headers and 'application/json' in headers.values()
if need_restore and not is_json:
data = json.loads(data)
return json.loads(body)
return body

return self.get_result(
endpoint, data, headers, method=method, callback=self._parse)

def _parse(self, response):
def get_result(self, response):
try:
response = json.loads(response)
except Exception:
try:
response = etree.fromstring(response)
except Exception:
return response
result = eval(
self.engine_data.get('response'), {"response": response})
result = eval(self.response, {"response": response})
if not is_str(result):
raise Exception(_('Response was parsed incorrectly.'))
return result
Loading

0 comments on commit 6fae2be

Please sign in to comment.