解析JupyterNotebook.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. from toolbox import update_ui
  2. from toolbox import CatchException, report_exception
  3. from toolbox import write_history_to_file, promote_file_to_downloadzone
  4. fast_debug = True
  5. class PaperFileGroup():
  6. def __init__(self):
  7. self.file_paths = []
  8. self.file_contents = []
  9. self.sp_file_contents = []
  10. self.sp_file_index = []
  11. self.sp_file_tag = []
  12. def run_file_split(self, max_token_limit=1900):
  13. """
  14. 将长文本分离开来
  15. """
  16. for index, file_content in enumerate(self.file_contents):
  17. if self.get_token_num(file_content) < max_token_limit:
  18. self.sp_file_contents.append(file_content)
  19. self.sp_file_index.append(index)
  20. self.sp_file_tag.append(self.file_paths[index])
  21. else:
  22. from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
  23. segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
  24. for j, segment in enumerate(segments):
  25. self.sp_file_contents.append(segment)
  26. self.sp_file_index.append(index)
  27. self.sp_file_tag.append(
  28. self.file_paths[index] + f".part-{j}.txt")
  29. def parseNotebook(filename, enable_markdown=1):
  30. import json
  31. CodeBlocks = []
  32. with open(filename, 'r', encoding='utf-8', errors='replace') as f:
  33. notebook = json.load(f)
  34. for cell in notebook['cells']:
  35. if cell['cell_type'] == 'code' and cell['source']:
  36. # remove blank lines
  37. cell['source'] = [line for line in cell['source'] if line.strip()
  38. != '']
  39. CodeBlocks.append("".join(cell['source']))
  40. elif enable_markdown and cell['cell_type'] == 'markdown' and cell['source']:
  41. cell['source'] = [line for line in cell['source'] if line.strip()
  42. != '']
  43. CodeBlocks.append("Markdown:"+"".join(cell['source']))
  44. Code = ""
  45. for idx, code in enumerate(CodeBlocks):
  46. Code += f"This is {idx+1}th code block: \n"
  47. Code += code+"\n"
  48. return Code
  49. def ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt):
  50. from .crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
  51. if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
  52. enable_markdown = plugin_kwargs.get("advanced_arg", "1")
  53. try:
  54. enable_markdown = int(enable_markdown)
  55. except ValueError:
  56. enable_markdown = 1
  57. pfg = PaperFileGroup()
  58. for fp in file_manifest:
  59. file_content = parseNotebook(fp, enable_markdown=enable_markdown)
  60. pfg.file_paths.append(fp)
  61. pfg.file_contents.append(file_content)
  62. # <-------- 拆分过长的IPynb文件 ---------->
  63. pfg.run_file_split(max_token_limit=1024)
  64. n_split = len(pfg.sp_file_contents)
  65. inputs_array = [r"This is a Jupyter Notebook file, tell me about Each Block in Chinese. Focus Just On Code." +
  66. r"If a block starts with `Markdown` which means it's a markdown block in ipynbipynb. " +
  67. r"Start a new line for a block and block num use Chinese." +
  68. f"\n\n{frag}" for frag in pfg.sp_file_contents]
  69. inputs_show_user_array = [f"{f}的分析如下" for f in pfg.sp_file_tag]
  70. sys_prompt_array = ["You are a professional programmer."] * n_split
  71. gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
  72. inputs_array=inputs_array,
  73. inputs_show_user_array=inputs_show_user_array,
  74. llm_kwargs=llm_kwargs,
  75. chatbot=chatbot,
  76. history_array=[[""] for _ in range(n_split)],
  77. sys_prompt_array=sys_prompt_array,
  78. # max_workers=5, # OpenAI所允许的最大并行过载
  79. scroller_max_len=80
  80. )
  81. # <-------- 整理结果,退出 ---------->
  82. block_result = " \n".join(gpt_response_collection)
  83. chatbot.append(("解析的结果如下", block_result))
  84. history.extend(["解析的结果如下", block_result])
  85. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  86. # <-------- 写入文件,退出 ---------->
  87. res = write_history_to_file(history)
  88. promote_file_to_downloadzone(res, chatbot=chatbot)
  89. chatbot.append(("完成了吗?", res))
  90. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  91. @CatchException
  92. def 解析ipynb文件(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
  93. chatbot.append([
  94. "函数插件功能?",
  95. "对IPynb文件进行解析。Contributor: codycjy."])
  96. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  97. history = [] # 清空历史
  98. import glob
  99. import os
  100. if os.path.exists(txt):
  101. project_folder = txt
  102. else:
  103. if txt == "":
  104. txt = '空空如也的输入栏'
  105. report_exception(chatbot, history,
  106. a=f"解析项目: {txt}", b=f"找不到本地项目或无权访问: {txt}")
  107. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  108. return
  109. if txt.endswith('.ipynb'):
  110. file_manifest = [txt]
  111. else:
  112. file_manifest = [f for f in glob.glob(
  113. f'{project_folder}/**/*.ipynb', recursive=True)]
  114. if len(file_manifest) == 0:
  115. report_exception(chatbot, history,
  116. a=f"解析项目: {txt}", b=f"找不到任何.ipynb文件: {txt}")
  117. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  118. return
  119. yield from ipynb解释(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, )