latex_actions.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. from toolbox import update_ui, update_ui_lastest_msg, get_log_folder
  2. from toolbox import get_conf, objdump, objload, promote_file_to_downloadzone
  3. from .latex_toolbox import PRESERVE, TRANSFORM
  4. from .latex_toolbox import set_forbidden_text, set_forbidden_text_begin_end, set_forbidden_text_careful_brace
  5. from .latex_toolbox import reverse_forbidden_text_careful_brace, reverse_forbidden_text, convert_to_linklist, post_process
  6. from .latex_toolbox import fix_content, find_main_tex_file, merge_tex_files, compile_latex_with_timeout
  7. from .latex_toolbox import find_title_and_abs
  8. import os, shutil
  9. import re
  10. import numpy as np
  11. pj = os.path.join
  12. def split_subprocess(txt, project_folder, return_dict, opts):
  13. """
  14. break down latex file to a linked list,
  15. each node use a preserve flag to indicate whether it should
  16. be proccessed by GPT.
  17. """
  18. text = txt
  19. mask = np.zeros(len(txt), dtype=np.uint8) + TRANSFORM
  20. # 吸收title与作者以上的部分
  21. text, mask = set_forbidden_text(text, mask, r"^(.*?)\\maketitle", re.DOTALL)
  22. text, mask = set_forbidden_text(text, mask, r"^(.*?)\\begin{document}", re.DOTALL)
  23. # 吸收iffalse注释
  24. text, mask = set_forbidden_text(text, mask, r"\\iffalse(.*?)\\fi", re.DOTALL)
  25. # 吸收在42行以内的begin-end组合
  26. text, mask = set_forbidden_text_begin_end(text, mask, r"\\begin\{([a-z\*]*)\}(.*?)\\end\{\1\}", re.DOTALL, limit_n_lines=42)
  27. # 吸收匿名公式
  28. text, mask = set_forbidden_text(text, mask, [ r"\$\$([^$]+)\$\$", r"\\\[.*?\\\]" ], re.DOTALL)
  29. # 吸收其他杂项
  30. text, mask = set_forbidden_text(text, mask, [ r"\\section\{(.*?)\}", r"\\section\*\{(.*?)\}", r"\\subsection\{(.*?)\}", r"\\subsubsection\{(.*?)\}" ])
  31. text, mask = set_forbidden_text(text, mask, [ r"\\bibliography\{(.*?)\}", r"\\bibliographystyle\{(.*?)\}" ])
  32. text, mask = set_forbidden_text(text, mask, r"\\begin\{thebibliography\}.*?\\end\{thebibliography\}", re.DOTALL)
  33. text, mask = set_forbidden_text(text, mask, r"\\begin\{lstlisting\}(.*?)\\end\{lstlisting\}", re.DOTALL)
  34. text, mask = set_forbidden_text(text, mask, r"\\begin\{wraptable\}(.*?)\\end\{wraptable\}", re.DOTALL)
  35. text, mask = set_forbidden_text(text, mask, r"\\begin\{algorithm\}(.*?)\\end\{algorithm\}", re.DOTALL)
  36. text, mask = set_forbidden_text(text, mask, [r"\\begin\{wrapfigure\}(.*?)\\end\{wrapfigure\}", r"\\begin\{wrapfigure\*\}(.*?)\\end\{wrapfigure\*\}"], re.DOTALL)
  37. text, mask = set_forbidden_text(text, mask, [r"\\begin\{figure\}(.*?)\\end\{figure\}", r"\\begin\{figure\*\}(.*?)\\end\{figure\*\}"], re.DOTALL)
  38. text, mask = set_forbidden_text(text, mask, [r"\\begin\{multline\}(.*?)\\end\{multline\}", r"\\begin\{multline\*\}(.*?)\\end\{multline\*\}"], re.DOTALL)
  39. text, mask = set_forbidden_text(text, mask, [r"\\begin\{table\}(.*?)\\end\{table\}", r"\\begin\{table\*\}(.*?)\\end\{table\*\}"], re.DOTALL)
  40. text, mask = set_forbidden_text(text, mask, [r"\\begin\{minipage\}(.*?)\\end\{minipage\}", r"\\begin\{minipage\*\}(.*?)\\end\{minipage\*\}"], re.DOTALL)
  41. text, mask = set_forbidden_text(text, mask, [r"\\begin\{align\*\}(.*?)\\end\{align\*\}", r"\\begin\{align\}(.*?)\\end\{align\}"], re.DOTALL)
  42. text, mask = set_forbidden_text(text, mask, [r"\\begin\{equation\}(.*?)\\end\{equation\}", r"\\begin\{equation\*\}(.*?)\\end\{equation\*\}"], re.DOTALL)
  43. text, mask = set_forbidden_text(text, mask, [r"\\includepdf\[(.*?)\]\{(.*?)\}", r"\\clearpage", r"\\newpage", r"\\appendix", r"\\tableofcontents", r"\\include\{(.*?)\}"])
  44. text, mask = set_forbidden_text(text, mask, [r"\\vspace\{(.*?)\}", r"\\hspace\{(.*?)\}", r"\\label\{(.*?)\}", r"\\begin\{(.*?)\}", r"\\end\{(.*?)\}", r"\\item "])
  45. text, mask = set_forbidden_text_careful_brace(text, mask, r"\\hl\{(.*?)\}", re.DOTALL)
  46. # reverse 操作必须放在最后
  47. text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\caption\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
  48. text, mask = reverse_forbidden_text_careful_brace(text, mask, r"\\abstract\{(.*?)\}", re.DOTALL, forbid_wrapper=True)
  49. text, mask = reverse_forbidden_text(text, mask, r"\\begin\{abstract\}(.*?)\\end\{abstract\}", re.DOTALL, forbid_wrapper=True)
  50. root = convert_to_linklist(text, mask)
  51. # 最后一步处理,增强稳健性
  52. root = post_process(root)
  53. # 输出html调试文件,用红色标注处保留区(PRESERVE),用黑色标注转换区(TRANSFORM)
  54. with open(pj(project_folder, 'debug_log.html'), 'w', encoding='utf8') as f:
  55. segment_parts_for_gpt = []
  56. nodes = []
  57. node = root
  58. while True:
  59. nodes.append(node)
  60. show_html = node.string.replace('\n','<br/>')
  61. if not node.preserve:
  62. segment_parts_for_gpt.append(node.string)
  63. f.write(f'<p style="color:black;">#{node.range}{show_html}#</p>')
  64. else:
  65. f.write(f'<p style="color:red;">{show_html}</p>')
  66. node = node.next
  67. if node is None: break
  68. for n in nodes: n.next = None # break
  69. return_dict['nodes'] = nodes
  70. return_dict['segment_parts_for_gpt'] = segment_parts_for_gpt
  71. return return_dict
  72. class LatexPaperSplit():
  73. """
  74. break down latex file to a linked list,
  75. each node use a preserve flag to indicate whether it should
  76. be proccessed by GPT.
  77. """
  78. def __init__(self) -> None:
  79. self.nodes = None
  80. self.msg = "*{\\scriptsize\\textbf{警告:该PDF由GPT-Academic开源项目调用大语言模型+Latex翻译插件一键生成," + \
  81. "版权归原文作者所有。翻译内容可靠性无保障,请仔细鉴别并以原文为准。" + \
  82. "项目Github地址 \\url{https://github.com/binary-husky/gpt_academic/}。"
  83. # 请您不要删除或修改这行警告,除非您是论文的原作者(如果您是论文原作者,欢迎加REAME中的QQ联系开发者)
  84. self.msg_declare = "为了防止大语言模型的意外谬误产生扩散影响,禁止移除或修改此警告。}}\\\\"
  85. self.title = "unknown"
  86. self.abstract = "unknown"
  87. def read_title_and_abstract(self, txt):
  88. try:
  89. title, abstract = find_title_and_abs(txt)
  90. if title is not None:
  91. self.title = title.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
  92. if abstract is not None:
  93. self.abstract = abstract.replace('\n', ' ').replace('\\\\', ' ').replace(' ', '').replace(' ', '')
  94. except:
  95. pass
  96. def merge_result(self, arr, mode, msg, buggy_lines=[], buggy_line_surgery_n_lines=10):
  97. """
  98. Merge the result after the GPT process completed
  99. """
  100. result_string = ""
  101. node_cnt = 0
  102. line_cnt = 0
  103. for node in self.nodes:
  104. if node.preserve:
  105. line_cnt += node.string.count('\n')
  106. result_string += node.string
  107. else:
  108. translated_txt = fix_content(arr[node_cnt], node.string)
  109. begin_line = line_cnt
  110. end_line = line_cnt + translated_txt.count('\n')
  111. # reverse translation if any error
  112. if any([begin_line-buggy_line_surgery_n_lines <= b_line <= end_line+buggy_line_surgery_n_lines for b_line in buggy_lines]):
  113. translated_txt = node.string
  114. result_string += translated_txt
  115. node_cnt += 1
  116. line_cnt += translated_txt.count('\n')
  117. if mode == 'translate_zh':
  118. pattern = re.compile(r'\\begin\{abstract\}.*\n')
  119. match = pattern.search(result_string)
  120. if not match:
  121. # match \abstract{xxxx}
  122. pattern_compile = re.compile(r"\\abstract\{(.*?)\}", flags=re.DOTALL)
  123. match = pattern_compile.search(result_string)
  124. position = match.regs[1][0]
  125. else:
  126. # match \begin{abstract}xxxx\end{abstract}
  127. position = match.end()
  128. result_string = result_string[:position] + self.msg + msg + self.msg_declare + result_string[position:]
  129. return result_string
  130. def split(self, txt, project_folder, opts):
  131. """
  132. break down latex file to a linked list,
  133. each node use a preserve flag to indicate whether it should
  134. be proccessed by GPT.
  135. P.S. use multiprocessing to avoid timeout error
  136. """
  137. import multiprocessing
  138. manager = multiprocessing.Manager()
  139. return_dict = manager.dict()
  140. p = multiprocessing.Process(
  141. target=split_subprocess,
  142. args=(txt, project_folder, return_dict, opts))
  143. p.start()
  144. p.join()
  145. p.close()
  146. self.nodes = return_dict['nodes']
  147. self.sp = return_dict['segment_parts_for_gpt']
  148. return self.sp
  149. class LatexPaperFileGroup():
  150. """
  151. use tokenizer to break down text according to max_token_limit
  152. """
  153. def __init__(self):
  154. self.file_paths = []
  155. self.file_contents = []
  156. self.sp_file_contents = []
  157. self.sp_file_index = []
  158. self.sp_file_tag = []
  159. # count_token
  160. from request_llms.bridge_all import model_info
  161. enc = model_info["gpt-3.5-turbo"]['tokenizer']
  162. def get_token_num(txt): return len(enc.encode(txt, disallowed_special=()))
  163. self.get_token_num = get_token_num
  164. def run_file_split(self, max_token_limit=1900):
  165. """
  166. use tokenizer to break down text according to max_token_limit
  167. """
  168. for index, file_content in enumerate(self.file_contents):
  169. if self.get_token_num(file_content) < max_token_limit:
  170. self.sp_file_contents.append(file_content)
  171. self.sp_file_index.append(index)
  172. self.sp_file_tag.append(self.file_paths[index])
  173. else:
  174. from crazy_functions.pdf_fns.breakdown_txt import breakdown_text_to_satisfy_token_limit
  175. segments = breakdown_text_to_satisfy_token_limit(file_content, max_token_limit)
  176. for j, segment in enumerate(segments):
  177. self.sp_file_contents.append(segment)
  178. self.sp_file_index.append(index)
  179. self.sp_file_tag.append(self.file_paths[index] + f".part-{j}.tex")
  180. def merge_result(self):
  181. self.file_result = ["" for _ in range(len(self.file_paths))]
  182. for r, k in zip(self.sp_file_result, self.sp_file_index):
  183. self.file_result[k] += r
  184. def write_result(self):
  185. manifest = []
  186. for path, res in zip(self.file_paths, self.file_result):
  187. with open(path + '.polish.tex', 'w', encoding='utf8') as f:
  188. manifest.append(path + '.polish.tex')
  189. f.write(res)
  190. return manifest
  191. def Latex精细分解与转化(file_manifest, project_folder, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, mode='proofread', switch_prompt=None, opts=[]):
  192. import time, os, re
  193. from ..crazy_utils import request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency
  194. from .latex_actions import LatexPaperFileGroup, LatexPaperSplit
  195. # <-------- 寻找主tex文件 ---------->
  196. maintex = find_main_tex_file(file_manifest, mode)
  197. chatbot.append((f"定位主Latex文件", f'[Local Message] 分析结果:该项目的Latex主文件是{maintex}, 如果分析错误, 请立即终止程序, 删除或修改歧义文件, 然后重试。主程序即将开始, 请稍候。'))
  198. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  199. time.sleep(3)
  200. # <-------- 读取Latex文件, 将多文件tex工程融合为一个巨型tex ---------->
  201. main_tex_basename = os.path.basename(maintex)
  202. assert main_tex_basename.endswith('.tex')
  203. main_tex_basename_bare = main_tex_basename[:-4]
  204. may_exist_bbl = pj(project_folder, f'{main_tex_basename_bare}.bbl')
  205. if os.path.exists(may_exist_bbl):
  206. shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge.bbl'))
  207. shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_{mode}.bbl'))
  208. shutil.copyfile(may_exist_bbl, pj(project_folder, f'merge_diff.bbl'))
  209. with open(maintex, 'r', encoding='utf-8', errors='replace') as f:
  210. content = f.read()
  211. merged_content = merge_tex_files(project_folder, content, mode)
  212. with open(project_folder + '/merge.tex', 'w', encoding='utf-8', errors='replace') as f:
  213. f.write(merged_content)
  214. # <-------- 精细切分latex文件 ---------->
  215. chatbot.append((f"Latex文件融合完成", f'[Local Message] 正在精细切分latex文件,这需要一段时间计算,文档越长耗时越长,请耐心等待。'))
  216. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  217. lps = LatexPaperSplit()
  218. lps.read_title_and_abstract(merged_content)
  219. res = lps.split(merged_content, project_folder, opts) # 消耗时间的函数
  220. # <-------- 拆分过长的latex片段 ---------->
  221. pfg = LatexPaperFileGroup()
  222. for index, r in enumerate(res):
  223. pfg.file_paths.append('segment-' + str(index))
  224. pfg.file_contents.append(r)
  225. pfg.run_file_split(max_token_limit=1024)
  226. n_split = len(pfg.sp_file_contents)
  227. # <-------- 根据需要切换prompt ---------->
  228. inputs_array, sys_prompt_array = switch_prompt(pfg, mode)
  229. inputs_show_user_array = [f"{mode} {f}" for f in pfg.sp_file_tag]
  230. if os.path.exists(pj(project_folder,'temp.pkl')):
  231. # <-------- 【仅调试】如果存在调试缓存文件,则跳过GPT请求环节 ---------->
  232. pfg = objload(file=pj(project_folder,'temp.pkl'))
  233. else:
  234. # <-------- gpt 多线程请求 ---------->
  235. history_array = [[""] for _ in range(n_split)]
  236. # LATEX_EXPERIMENTAL, = get_conf('LATEX_EXPERIMENTAL')
  237. # if LATEX_EXPERIMENTAL:
  238. # paper_meta = f"The paper you processing is `{lps.title}`, a part of the abstraction is `{lps.abstract}`"
  239. # paper_meta_max_len = 888
  240. # history_array = [[ paper_meta[:paper_meta_max_len] + '...', "Understand, what should I do?"] for _ in range(n_split)]
  241. gpt_response_collection = yield from request_gpt_model_multi_threads_with_very_awesome_ui_and_high_efficiency(
  242. inputs_array=inputs_array,
  243. inputs_show_user_array=inputs_show_user_array,
  244. llm_kwargs=llm_kwargs,
  245. chatbot=chatbot,
  246. history_array=history_array,
  247. sys_prompt_array=sys_prompt_array,
  248. # max_workers=5, # 并行任务数量限制, 最多同时执行5个, 其他的排队等待
  249. scroller_max_len = 40
  250. )
  251. # <-------- 文本碎片重组为完整的tex片段 ---------->
  252. pfg.sp_file_result = []
  253. for i_say, gpt_say, orig_content in zip(gpt_response_collection[0::2], gpt_response_collection[1::2], pfg.sp_file_contents):
  254. pfg.sp_file_result.append(gpt_say)
  255. pfg.merge_result()
  256. # <-------- 临时存储用于调试 ---------->
  257. pfg.get_token_num = None
  258. objdump(pfg, file=pj(project_folder,'temp.pkl'))
  259. write_html(pfg.sp_file_contents, pfg.sp_file_result, chatbot=chatbot, project_folder=project_folder)
  260. # <-------- 写出文件 ---------->
  261. msg = f"当前大语言模型: {llm_kwargs['llm_model']},当前语言模型温度设定: {llm_kwargs['temperature']}。"
  262. final_tex = lps.merge_result(pfg.file_result, mode, msg)
  263. objdump((lps, pfg.file_result, mode, msg), file=pj(project_folder,'merge_result.pkl'))
  264. with open(project_folder + f'/merge_{mode}.tex', 'w', encoding='utf-8', errors='replace') as f:
  265. if mode != 'translate_zh' or "binary" in final_tex: f.write(final_tex)
  266. # <-------- 整理结果, 退出 ---------->
  267. chatbot.append((f"完成了吗?", 'GPT结果已输出, 即将编译PDF'))
  268. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面
  269. # <-------- 返回 ---------->
  270. return project_folder + f'/merge_{mode}.tex'
  271. def remove_buggy_lines(file_path, log_path, tex_name, tex_name_pure, n_fix, work_folder_modified, fixed_line=[]):
  272. try:
  273. with open(log_path, 'r', encoding='utf-8', errors='replace') as f:
  274. log = f.read()
  275. import re
  276. buggy_lines = re.findall(tex_name+':([0-9]{1,5}):', log)
  277. buggy_lines = [int(l) for l in buggy_lines]
  278. buggy_lines = sorted(buggy_lines)
  279. buggy_line = buggy_lines[0]-1
  280. print("reversing tex line that has errors", buggy_line)
  281. # 重组,逆转出错的段落
  282. if buggy_line not in fixed_line:
  283. fixed_line.append(buggy_line)
  284. lps, file_result, mode, msg = objload(file=pj(work_folder_modified,'merge_result.pkl'))
  285. final_tex = lps.merge_result(file_result, mode, msg, buggy_lines=fixed_line, buggy_line_surgery_n_lines=5*n_fix)
  286. with open(pj(work_folder_modified, f"{tex_name_pure}_fix_{n_fix}.tex"), 'w', encoding='utf-8', errors='replace') as f:
  287. f.write(final_tex)
  288. return True, f"{tex_name_pure}_fix_{n_fix}", buggy_lines
  289. except:
  290. print("Fatal error occurred, but we cannot identify error, please download zip, read latex log, and compile manually.")
  291. return False, -1, [-1]
  292. def 编译Latex(chatbot, history, main_file_original, main_file_modified, work_folder_original, work_folder_modified, work_folder, mode='default'):
  293. import os, time
  294. n_fix = 1
  295. fixed_line = []
  296. max_try = 32
  297. chatbot.append([f"正在编译PDF文档", f'编译已经开始。当前工作路径为{work_folder},如果程序停顿5分钟以上,请直接去该路径下取回翻译结果,或者重启之后再度尝试 ...']); yield from update_ui(chatbot=chatbot, history=history)
  298. chatbot.append([f"正在编译PDF文档", '...']); yield from update_ui(chatbot=chatbot, history=history); time.sleep(1); chatbot[-1] = list(chatbot[-1]) # 刷新界面
  299. yield from update_ui_lastest_msg('编译已经开始...', chatbot, history) # 刷新Gradio前端界面
  300. while True:
  301. import os
  302. may_exist_bbl = pj(work_folder_modified, f'merge.bbl')
  303. target_bbl = pj(work_folder_modified, f'{main_file_modified}.bbl')
  304. if os.path.exists(may_exist_bbl) and not os.path.exists(target_bbl):
  305. shutil.copyfile(may_exist_bbl, target_bbl)
  306. # https://stackoverflow.com/questions/738755/dont-make-me-manually-abort-a-latex-compile-when-theres-an-error
  307. yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译原始PDF ...', chatbot, history) # 刷新Gradio前端界面
  308. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
  309. yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译转化后的PDF ...', chatbot, history) # 刷新Gradio前端界面
  310. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
  311. if ok and os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf')):
  312. # 只有第二步成功,才能继续下面的步骤
  313. yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译BibTex ...', chatbot, history) # 刷新Gradio前端界面
  314. if not os.path.exists(pj(work_folder_original, f'{main_file_original}.bbl')):
  315. ok = compile_latex_with_timeout(f'bibtex {main_file_original}.aux', work_folder_original)
  316. if not os.path.exists(pj(work_folder_modified, f'{main_file_modified}.bbl')):
  317. ok = compile_latex_with_timeout(f'bibtex {main_file_modified}.aux', work_folder_modified)
  318. yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 编译文献交叉引用 ...', chatbot, history) # 刷新Gradio前端界面
  319. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
  320. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
  321. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_original}.tex', work_folder_original)
  322. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error {main_file_modified}.tex', work_folder_modified)
  323. if mode!='translate_zh':
  324. yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 使用latexdiff生成论文转化前后对比 ...', chatbot, history) # 刷新Gradio前端界面
  325. print( f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex')
  326. ok = compile_latex_with_timeout(f'latexdiff --encoding=utf8 --append-safecmd=subfile {work_folder_original}/{main_file_original}.tex {work_folder_modified}/{main_file_modified}.tex --flatten > {work_folder}/merge_diff.tex', os.getcwd())
  327. yield from update_ui_lastest_msg(f'尝试第 {n_fix}/{max_try} 次编译, 正在编译对比PDF ...', chatbot, history) # 刷新Gradio前端界面
  328. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
  329. ok = compile_latex_with_timeout(f'bibtex merge_diff.aux', work_folder)
  330. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
  331. ok = compile_latex_with_timeout(f'pdflatex -interaction=batchmode -file-line-error merge_diff.tex', work_folder)
  332. # <---------- 检查结果 ----------->
  333. results_ = ""
  334. original_pdf_success = os.path.exists(pj(work_folder_original, f'{main_file_original}.pdf'))
  335. modified_pdf_success = os.path.exists(pj(work_folder_modified, f'{main_file_modified}.pdf'))
  336. diff_pdf_success = os.path.exists(pj(work_folder, f'merge_diff.pdf'))
  337. results_ += f"原始PDF编译是否成功: {original_pdf_success};"
  338. results_ += f"转化PDF编译是否成功: {modified_pdf_success};"
  339. results_ += f"对比PDF编译是否成功: {diff_pdf_success};"
  340. yield from update_ui_lastest_msg(f'第{n_fix}编译结束:<br/>{results_}...', chatbot, history) # 刷新Gradio前端界面
  341. if diff_pdf_success:
  342. result_pdf = pj(work_folder_modified, f'merge_diff.pdf') # get pdf path
  343. promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
  344. if modified_pdf_success:
  345. yield from update_ui_lastest_msg(f'转化PDF编译已经成功, 正在尝试生成对比PDF, 请稍候 ...', chatbot, history) # 刷新Gradio前端界面
  346. result_pdf = pj(work_folder_modified, f'{main_file_modified}.pdf') # get pdf path
  347. origin_pdf = pj(work_folder_original, f'{main_file_original}.pdf') # get pdf path
  348. if os.path.exists(pj(work_folder, '..', 'translation')):
  349. shutil.copyfile(result_pdf, pj(work_folder, '..', 'translation', 'translate_zh.pdf'))
  350. promote_file_to_downloadzone(result_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
  351. # 将两个PDF拼接
  352. if original_pdf_success:
  353. try:
  354. from .latex_toolbox import merge_pdfs
  355. concat_pdf = pj(work_folder_modified, f'comparison.pdf')
  356. merge_pdfs(origin_pdf, result_pdf, concat_pdf)
  357. if os.path.exists(pj(work_folder, '..', 'translation')):
  358. shutil.copyfile(concat_pdf, pj(work_folder, '..', 'translation', 'comparison.pdf'))
  359. promote_file_to_downloadzone(concat_pdf, rename_file=None, chatbot=chatbot) # promote file to web UI
  360. except Exception as e:
  361. print(e)
  362. pass
  363. return True # 成功啦
  364. else:
  365. if n_fix>=max_try: break
  366. n_fix += 1
  367. can_retry, main_file_modified, buggy_lines = remove_buggy_lines(
  368. file_path=pj(work_folder_modified, f'{main_file_modified}.tex'),
  369. log_path=pj(work_folder_modified, f'{main_file_modified}.log'),
  370. tex_name=f'{main_file_modified}.tex',
  371. tex_name_pure=f'{main_file_modified}',
  372. n_fix=n_fix,
  373. work_folder_modified=work_folder_modified,
  374. fixed_line=fixed_line
  375. )
  376. yield from update_ui_lastest_msg(f'由于最为关键的转化PDF编译失败, 将根据报错信息修正tex源文件并重试, 当前报错的latex代码处于第{buggy_lines}行 ...', chatbot, history) # 刷新Gradio前端界面
  377. if not can_retry: break
  378. return False # 失败啦
  379. def write_html(sp_file_contents, sp_file_result, chatbot, project_folder):
  380. # write html
  381. try:
  382. import shutil
  383. from crazy_functions.pdf_fns.report_gen_html import construct_html
  384. from toolbox import gen_time_str
  385. ch = construct_html()
  386. orig = ""
  387. trans = ""
  388. final = []
  389. for c,r in zip(sp_file_contents, sp_file_result):
  390. final.append(c)
  391. final.append(r)
  392. for i, k in enumerate(final):
  393. if i%2==0:
  394. orig = k
  395. if i%2==1:
  396. trans = k
  397. ch.add_row(a=orig, b=trans)
  398. create_report_file_name = f"{gen_time_str()}.trans.html"
  399. res = ch.save_file(create_report_file_name)
  400. shutil.copyfile(res, pj(project_folder, create_report_file_name))
  401. promote_file_to_downloadzone(file=res, chatbot=chatbot)
  402. except:
  403. from toolbox import trimmed_format_exc
  404. print('writing html result failed:', trimmed_format_exc())