audio_convert.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. import shutil
  2. import wave
  3. from common.log import logger
  4. try:
  5. import pysilk
  6. except ImportError:
  7. logger.debug("import pysilk failed, wechaty voice message will not be supported.")
  8. from pydub import AudioSegment
  9. sil_supports = [8000, 12000, 16000, 24000, 32000, 44100, 48000] # slk转wav时,支持的采样率
  10. def find_closest_sil_supports(sample_rate):
  11. """
  12. 找到最接近的支持的采样率
  13. """
  14. if sample_rate in sil_supports:
  15. return sample_rate
  16. closest = 0
  17. mindiff = 9999999
  18. for rate in sil_supports:
  19. diff = abs(rate - sample_rate)
  20. if diff < mindiff:
  21. closest = rate
  22. mindiff = diff
  23. return closest
  24. def get_pcm_from_wav(wav_path):
  25. """
  26. 从 wav 文件中读取 pcm
  27. :param wav_path: wav 文件路径
  28. :returns: pcm 数据
  29. """
  30. wav = wave.open(wav_path, "rb")
  31. return wav.readframes(wav.getnframes())
  32. def any_to_mp3(any_path, mp3_path):
  33. """
  34. 把任意格式转成mp3文件
  35. """
  36. if any_path.endswith(".mp3"):
  37. shutil.copy2(any_path, mp3_path)
  38. return
  39. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  40. sil_to_wav(any_path, any_path)
  41. any_path = mp3_path
  42. audio = AudioSegment.from_file(any_path)
  43. audio.export(mp3_path, format="mp3")
  44. def any_to_wav(any_path, wav_path):
  45. """
  46. 把任意格式转成wav文件
  47. """
  48. if any_path.endswith(".wav"):
  49. shutil.copy2(any_path, wav_path)
  50. return
  51. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  52. return sil_to_wav(any_path, wav_path)
  53. audio = AudioSegment.from_file(any_path)
  54. audio.set_frame_rate(8000) # 百度语音转写支持8000采样率, pcm_s16le, 单通道语音识别
  55. audio.set_channels(1)
  56. audio.export(wav_path, format="wav", codec='pcm_s16le')
  57. def any_to_sil(any_path, sil_path):
  58. """
  59. 把任意格式转成sil文件
  60. """
  61. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  62. shutil.copy2(any_path, sil_path)
  63. return 10000
  64. audio = AudioSegment.from_file(any_path)
  65. rate = find_closest_sil_supports(audio.frame_rate)
  66. # Convert to PCM_s16
  67. pcm_s16 = audio.set_sample_width(2)
  68. pcm_s16 = pcm_s16.set_frame_rate(rate)
  69. wav_data = pcm_s16.raw_data
  70. silk_data = pysilk.encode(wav_data, data_rate=rate, sample_rate=rate)
  71. with open(sil_path, "wb") as f:
  72. f.write(silk_data)
  73. return audio.duration_seconds * 1000
  74. def any_to_amr(any_path, amr_path):
  75. """
  76. 把任意格式转成amr文件
  77. """
  78. if any_path.endswith(".amr"):
  79. shutil.copy2(any_path, amr_path)
  80. return
  81. if any_path.endswith(".sil") or any_path.endswith(".silk") or any_path.endswith(".slk"):
  82. raise NotImplementedError("Not support file type: {}".format(any_path))
  83. audio = AudioSegment.from_file(any_path)
  84. audio = audio.set_frame_rate(8000) # only support 8000
  85. audio.export(amr_path, format="amr")
  86. return audio.duration_seconds * 1000
  87. def sil_to_wav(silk_path, wav_path, rate: int = 24000):
  88. """
  89. silk 文件转 wav
  90. """
  91. wav_data = pysilk.decode_file(silk_path, to_wav=True, sample_rate=rate)
  92. with open(wav_path, "wb") as f:
  93. f.write(wav_data)
  94. def split_audio(file_path, max_segment_length_ms=60000):
  95. """
  96. 分割音频文件
  97. """
  98. audio = AudioSegment.from_file(file_path)
  99. audio_length_ms = len(audio)
  100. if audio_length_ms <= max_segment_length_ms:
  101. return audio_length_ms, [file_path]
  102. segments = []
  103. for start_ms in range(0, audio_length_ms, max_segment_length_ms):
  104. end_ms = min(audio_length_ms, start_ms + max_segment_length_ms)
  105. segment = audio[start_ms:end_ms]
  106. segments.append(segment)
  107. file_prefix = file_path[: file_path.rindex(".")]
  108. format = file_path[file_path.rindex(".") + 1 :]
  109. files = []
  110. for i, segment in enumerate(segments):
  111. path = f"{file_prefix}_{i+1}" + f".{format}"
  112. segment.export(path, format=format)
  113. files.append(path)
  114. return audio_length_ms, files