baidu.py 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. #from https://pypi.org/project/baidu-trans/
  2. import hashlib
  3. import urllib.parse
  4. import random
  5. import re
  6. import aiohttp
  7. from .common import CommonTranslator, InvalidServerResponse, MissingAPIKeyException
  8. from .keys import BAIDU_APP_ID, BAIDU_SECRET_KEY
  9. # base api url
  10. BASE_URL = 'api.fanyi.baidu.com'
  11. API_URL = '/api/trans/vip/translate'
  12. class BaiduTranslator(CommonTranslator):
  13. _LANGUAGE_CODE_MAP = {
  14. 'CHS': 'zh',
  15. 'CHT': 'cht',
  16. 'JPN': 'ja',
  17. 'ENG': 'en',
  18. 'KOR': 'kor',
  19. 'VIN': 'vie',
  20. 'CSY': 'cs',
  21. 'NLD': 'nl',
  22. 'FRA': 'fra',
  23. 'DEU': 'de',
  24. 'HUN': 'hu',
  25. 'ITA': 'it',
  26. 'PLK': 'pl',
  27. 'PTB': 'pt',
  28. 'ROM': 'rom',
  29. 'RUS': 'ru',
  30. 'ESP': 'spa',
  31. 'SRP': 'srp',
  32. 'HRV': 'hrv',
  33. 'THA': 'th'
  34. }
  35. _INVALID_REPEAT_COUNT = 1
  36. def __init__(self) -> None:
  37. super().__init__()
  38. if not BAIDU_APP_ID or not BAIDU_SECRET_KEY:
  39. raise MissingAPIKeyException('Please set the BAIDU_APP_ID and BAIDU_SECRET_KEY environment variables before using the baidu translator.')
  40. async def _translate(self, from_lang, to_lang, queries):
  41. # Split queries with \n up
  42. n_queries = []
  43. query_split_sizes = []
  44. for query in queries:
  45. batch = query.split('\n')
  46. query_split_sizes.append(len(batch))
  47. n_queries.extend(batch)
  48. url = self.get_url(from_lang, to_lang, '\n'.join(n_queries))
  49. async with aiohttp.ClientSession() as session:
  50. async with session.get('https://'+BASE_URL+url) as resp:
  51. result = await resp.json()
  52. result_list = []
  53. if "trans_result" not in result:
  54. raise InvalidServerResponse(f'Baidu returned invalid response: {result}\nAre the API keys set correctly?')
  55. for ret in result["trans_result"]:
  56. for v in ret["dst"].split('\n'):
  57. result_list.append(v)
  58. # Join queries that had \n back together
  59. translations = []
  60. i = 0
  61. for size in query_split_sizes:
  62. translations.append('\n'.join(result_list[i:i+size]))
  63. i += size
  64. return translations
  65. def _modify_invalid_translation_query(self, query: str, trans: str) -> str:
  66. query = re.sub(r'(.)\1{2}', r'\g<0>\n', query)
  67. return query
  68. @staticmethod
  69. def get_url(from_lang, to_lang, query_text):
  70. # 随机数据
  71. salt = random.randint(32768, 65536)
  72. # MD5生成签名
  73. sign = BAIDU_APP_ID + query_text + str(salt) + BAIDU_SECRET_KEY
  74. m1 = hashlib.md5()
  75. m1.update(sign.encode('utf-8'))
  76. sign = m1.hexdigest()
  77. # 拼接URL
  78. url = API_URL +'?appid=' + BAIDU_APP_ID + '&q=' + urllib.parse.quote(query_text) + '&from=' + from_lang + '&to=' + to_lang + '&salt=' + str(salt) + '&sign=' + sign
  79. return url