azure_voice.py 4.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. """
  2. azure voice service
  3. """
  4. import json
  5. import os
  6. import time
  7. import azure.cognitiveservices.speech as speechsdk
  8. from langid import classify
  9. from bridge.reply import Reply, ReplyType
  10. from common.log import logger
  11. from common.tmp_dir import TmpDir
  12. from config import conf
  13. from voice.voice import Voice
  14. """
  15. Azure voice
  16. 主目录设置文件中需填写azure_voice_api_key和azure_voice_region
  17. 查看可用的 voice: https://speech.microsoft.com/portal/voicegallery
  18. """
  19. class AzureVoice(Voice):
  20. def __init__(self):
  21. try:
  22. curdir = os.path.dirname(__file__)
  23. config_path = os.path.join(curdir, "config.json")
  24. config = None
  25. if not os.path.exists(config_path): # 如果没有配置文件,创建本地配置文件
  26. config = {
  27. "speech_synthesis_voice_name": "zh-CN-XiaoxiaoNeural", # 识别不出时的默认语音
  28. "auto_detect": True, # 是否自动检测语言
  29. "speech_synthesis_zh": "zh-CN-XiaozhenNeural",
  30. "speech_synthesis_en": "en-US-JacobNeural",
  31. "speech_synthesis_ja": "ja-JP-AoiNeural",
  32. "speech_synthesis_ko": "ko-KR-SoonBokNeural",
  33. "speech_synthesis_de": "de-DE-LouisaNeural",
  34. "speech_synthesis_fr": "fr-FR-BrigitteNeural",
  35. "speech_synthesis_es": "es-ES-LaiaNeural",
  36. "speech_recognition_language": "zh-CN",
  37. }
  38. with open(config_path, "w") as fw:
  39. json.dump(config, fw, indent=4)
  40. else:
  41. with open(config_path, "r") as fr:
  42. config = json.load(fr)
  43. self.config = config
  44. self.api_key = conf().get("azure_voice_api_key")
  45. self.api_region = conf().get("azure_voice_region")
  46. self.speech_config = speechsdk.SpeechConfig(subscription=self.api_key, region=self.api_region)
  47. self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
  48. self.speech_config.speech_recognition_language = self.config["speech_recognition_language"]
  49. except Exception as e:
  50. logger.warn("AzureVoice init failed: %s, ignore " % e)
  51. def voiceToText(self, voice_file):
  52. audio_config = speechsdk.AudioConfig(filename=voice_file)
  53. speech_recognizer = speechsdk.SpeechRecognizer(speech_config=self.speech_config, audio_config=audio_config)
  54. result = speech_recognizer.recognize_once()
  55. if result.reason == speechsdk.ResultReason.RecognizedSpeech:
  56. logger.info("[Azure] voiceToText voice file name={} text={}".format(voice_file, result.text))
  57. reply = Reply(ReplyType.TEXT, result.text)
  58. else:
  59. cancel_details = result.cancellation_details
  60. logger.error("[Azure] voiceToText error, result={}, errordetails={}".format(result, cancel_details.error_details))
  61. reply = Reply(ReplyType.ERROR, "抱歉,语音识别失败")
  62. return reply
  63. def textToVoice(self, text):
  64. if self.config.get("auto_detect"):
  65. lang = classify(text)[0]
  66. key = "speech_synthesis_" + lang
  67. if key in self.config:
  68. logger.info("[Azure] textToVoice auto detect language={}, voice={}".format(lang, self.config[key]))
  69. self.speech_config.speech_synthesis_voice_name = self.config[key]
  70. else:
  71. self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
  72. else:
  73. self.speech_config.speech_synthesis_voice_name = self.config["speech_synthesis_voice_name"]
  74. # Avoid the same filename under multithreading
  75. fileName = TmpDir().path() + "reply-" + str(int(time.time())) + "-" + str(hash(text) & 0x7FFFFFFF) + ".wav"
  76. audio_config = speechsdk.AudioConfig(filename=fileName)
  77. speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=self.speech_config, audio_config=audio_config)
  78. result = speech_synthesizer.speak_text(text)
  79. if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
  80. logger.info("[Azure] textToVoice text={} voice file name={}".format(text, fileName))
  81. reply = Reply(ReplyType.VOICE, fileName)
  82. else:
  83. cancel_details = result.cancellation_details
  84. logger.error("[Azure] textToVoice error, result={}, errordetails={}".format(result, cancel_details.error_details))
  85. reply = Reply(ReplyType.ERROR, "抱歉,语音合成失败")
  86. return reply