总结音视频.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. from toolbox import CatchException, report_exception, select_api_key, update_ui, get_conf
  2. from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive
  3. from toolbox import write_history_to_file, promote_file_to_downloadzone, get_log_folder
  4. def split_audio_file(filename, split_duration=1000):
  5. """
  6. 根据给定的切割时长将音频文件切割成多个片段。
  7. Args:
  8. filename (str): 需要被切割的音频文件名。
  9. split_duration (int, optional): 每个切割音频片段的时长(以秒为单位)。默认值为1000。
  10. Returns:
  11. filelist (list): 一个包含所有切割音频片段文件路径的列表。
  12. """
  13. from moviepy.editor import AudioFileClip
  14. import os
  15. os.makedirs(f"{get_log_folder(plugin_name='audio')}/mp3/cut/", exist_ok=True) # 创建存储切割音频的文件夹
  16. # 读取音频文件
  17. audio = AudioFileClip(filename)
  18. # 计算文件总时长和切割点
  19. total_duration = audio.duration
  20. split_points = list(range(0, int(total_duration), split_duration))
  21. split_points.append(int(total_duration))
  22. filelist = []
  23. # 切割音频文件
  24. for i in range(len(split_points) - 1):
  25. start_time = split_points[i]
  26. end_time = split_points[i + 1]
  27. split_audio = audio.subclip(start_time, end_time)
  28. split_audio.write_audiofile(f"{get_log_folder(plugin_name='audio')}/mp3/cut/{filename[0]}_{i}.mp3")
  29. filelist.append(f"{get_log_folder(plugin_name='audio')}/mp3/cut/{filename[0]}_{i}.mp3")
  30. audio.close()
  31. return filelist
  32. def AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history):
  33. import os, requests
  34. from moviepy.editor import AudioFileClip
  35. from request_llms.bridge_all import model_info
  36. # 设置OpenAI密钥和模型
  37. api_key = select_api_key(llm_kwargs['api_key'], llm_kwargs['llm_model'])
  38. chat_endpoint = model_info[llm_kwargs['llm_model']]['endpoint']
  39. whisper_endpoint = chat_endpoint.replace('chat/completions', 'audio/transcriptions')
  40. url = whisper_endpoint
  41. headers = {
  42. 'Authorization': f"Bearer {api_key}"
  43. }
  44. os.makedirs(f"{get_log_folder(plugin_name='audio')}/mp3/", exist_ok=True)
  45. for index, fp in enumerate(file_manifest):
  46. audio_history = []
  47. # 提取文件扩展名
  48. ext = os.path.splitext(fp)[1]
  49. # 提取视频中的音频
  50. if ext not in [".mp3", ".wav", ".m4a", ".mpga"]:
  51. audio_clip = AudioFileClip(fp)
  52. audio_clip.write_audiofile(f"{get_log_folder(plugin_name='audio')}/mp3/output{index}.mp3")
  53. fp = f"{get_log_folder(plugin_name='audio')}/mp3/output{index}.mp3"
  54. # 调用whisper模型音频转文字
  55. voice = split_audio_file(fp)
  56. for j, i in enumerate(voice):
  57. with open(i, 'rb') as f:
  58. file_content = f.read() # 读取文件内容到内存
  59. files = {
  60. 'file': (os.path.basename(i), file_content),
  61. }
  62. data = {
  63. "model": "whisper-1",
  64. "prompt": parse_prompt,
  65. 'response_format': "text"
  66. }
  67. chatbot.append([f"将 {i} 发送到openai音频解析终端 (whisper),当前参数:{parse_prompt}", "正在处理 ..."])
  68. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  69. proxies = get_conf('proxies')
  70. response = requests.post(url, headers=headers, files=files, data=data, proxies=proxies).text
  71. chatbot.append(["音频解析结果", response])
  72. history.extend(["音频解析结果", response])
  73. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  74. i_say = f'请对下面的音频片段做概述,音频内容是 ```{response}```'
  75. i_say_show_user = f'第{index + 1}段音频的第{j + 1} / {len(voice)}片段。'
  76. gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
  77. inputs=i_say,
  78. inputs_show_user=i_say_show_user,
  79. llm_kwargs=llm_kwargs,
  80. chatbot=chatbot,
  81. history=[],
  82. sys_prompt=f"总结音频。音频文件名{fp}"
  83. )
  84. chatbot[-1] = (i_say_show_user, gpt_say)
  85. history.extend([i_say_show_user, gpt_say])
  86. audio_history.extend([i_say_show_user, gpt_say])
  87. # 已经对该文章的所有片段总结完毕,如果文章被切分了
  88. result = "".join(audio_history)
  89. if len(audio_history) > 1:
  90. i_say = f"根据以上的对话,使用中文总结音频“{result}”的主要内容。"
  91. i_say_show_user = f'第{index + 1}段音频的主要内容:'
  92. gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
  93. inputs=i_say,
  94. inputs_show_user=i_say_show_user,
  95. llm_kwargs=llm_kwargs,
  96. chatbot=chatbot,
  97. history=audio_history,
  98. sys_prompt="总结文章。"
  99. )
  100. history.extend([i_say, gpt_say])
  101. audio_history.extend([i_say, gpt_say])
  102. res = write_history_to_file(history)
  103. promote_file_to_downloadzone(res, chatbot=chatbot)
  104. chatbot.append((f"第{index + 1}段音频完成了吗?", res))
  105. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  106. # 删除中间文件夹
  107. import shutil
  108. shutil.rmtree(f"{get_log_folder(plugin_name='audio')}/mp3")
  109. res = write_history_to_file(history)
  110. promote_file_to_downloadzone(res, chatbot=chatbot)
  111. chatbot.append(("所有音频都总结完成了吗?", res))
  112. yield from update_ui(chatbot=chatbot, history=history)
  113. @CatchException
  114. def 总结音视频(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, WEB_PORT):
  115. import glob, os
  116. # 基本信息:功能、贡献者
  117. chatbot.append([
  118. "函数插件功能?",
  119. "总结音视频内容,函数插件贡献者: dalvqw & BinaryHusky"])
  120. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  121. try:
  122. from moviepy.editor import AudioFileClip
  123. except:
  124. report_exception(chatbot, history,
  125. a=f"解析项目: {txt}",
  126. b=f"导入软件依赖失败。使用该模块需要额外依赖,安装方法```pip install --upgrade moviepy```。")
  127. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  128. return
  129. # 清空历史,以免输入溢出
  130. history = []
  131. # 检测输入参数,如没有给定输入参数,直接退出
  132. if os.path.exists(txt):
  133. project_folder = txt
  134. else:
  135. if txt == "": txt = '空空如也的输入栏'
  136. report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
  137. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  138. return
  139. # 搜索需要处理的文件清单
  140. extensions = ['.mp4', '.m4a', '.wav', '.mpga', '.mpeg', '.mp3', '.avi', '.mkv', '.flac', '.aac']
  141. if txt.endswith(tuple(extensions)):
  142. file_manifest = [txt]
  143. else:
  144. file_manifest = []
  145. for extension in extensions:
  146. file_manifest.extend(glob.glob(f'{project_folder}/**/*{extension}', recursive=True))
  147. # 如果没找到任何文件
  148. if len(file_manifest) == 0:
  149. report_exception(chatbot, history, a=f"解析项目: {txt}", b=f"找不到任何音频或视频文件: {txt}")
  150. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  151. return
  152. # 开始正式执行任务
  153. if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
  154. parse_prompt = plugin_kwargs.get("advanced_arg", '将音频解析为简体中文')
  155. yield from AnalyAudio(parse_prompt, file_manifest, llm_kwargs, chatbot, history)
  156. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面