联网的ChatGPT_bing版.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. from toolbox import CatchException, update_ui
  2. from .crazy_utils import request_gpt_model_in_new_thread_with_ui_alive, input_clipping
  3. import requests
  4. from bs4 import BeautifulSoup
  5. from request_llms.bridge_all import model_info
  6. def bing_search(query, proxies=None):
  7. query = query
  8. url = f"https://cn.bing.com/search?q={query}"
  9. headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'}
  10. response = requests.get(url, headers=headers, proxies=proxies)
  11. soup = BeautifulSoup(response.content, 'html.parser')
  12. results = []
  13. for g in soup.find_all('li', class_='b_algo'):
  14. anchors = g.find_all('a')
  15. if anchors:
  16. link = anchors[0]['href']
  17. if not link.startswith('http'):
  18. continue
  19. title = g.find('h2').text
  20. item = {'title': title, 'link': link}
  21. results.append(item)
  22. for r in results:
  23. print(r['link'])
  24. return results
  25. def scrape_text(url, proxies) -> str:
  26. """Scrape text from a webpage
  27. Args:
  28. url (str): The URL to scrape text from
  29. Returns:
  30. str: The scraped text
  31. """
  32. headers = {
  33. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
  34. 'Content-Type': 'text/plain',
  35. }
  36. try:
  37. response = requests.get(url, headers=headers, proxies=proxies, timeout=8)
  38. if response.encoding == "ISO-8859-1": response.encoding = response.apparent_encoding
  39. except:
  40. return "无法连接到该网页"
  41. soup = BeautifulSoup(response.text, "html.parser")
  42. for script in soup(["script", "style"]):
  43. script.extract()
  44. text = soup.get_text()
  45. lines = (line.strip() for line in text.splitlines())
  46. chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
  47. text = "\n".join(chunk for chunk in chunks if chunk)
  48. return text
  49. @CatchException
  50. def 连接bing搜索回答问题(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, web_port):
  51. """
  52. txt 输入栏用户输入的文本,例如需要翻译的一段话,再例如一个包含了待处理文件的路径
  53. llm_kwargs gpt模型参数,如温度和top_p等,一般原样传递下去就行
  54. plugin_kwargs 插件模型的参数,暂时没有用武之地
  55. chatbot 聊天显示框的句柄,用于显示给用户
  56. history 聊天历史,前情提要
  57. system_prompt 给gpt的静默提醒
  58. web_port 当前软件运行的端口号
  59. """
  60. history = [] # 清空历史,以免输入溢出
  61. chatbot.append((f"请结合互联网信息回答以下问题:{txt}",
  62. "[Local Message] 请注意,您正在调用一个[函数插件]的模板,该模板可以实现ChatGPT联网信息综合。该函数面向希望实现更多有趣功能的开发者,它可以作为创建新功能函数的模板。您若希望分享新的功能模组,请不吝PR!"))
  63. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
  64. # ------------- < 第1步:爬取搜索引擎的结果 > -------------
  65. from toolbox import get_conf
  66. proxies = get_conf('proxies')
  67. urls = bing_search(txt, proxies)
  68. history = []
  69. if len(urls) == 0:
  70. chatbot.append((f"结论:{txt}",
  71. "[Local Message] 受到bing限制,无法从bing获取信息!"))
  72. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
  73. return
  74. # ------------- < 第2步:依次访问网页 > -------------
  75. max_search_result = 8 # 最多收纳多少个网页的结果
  76. for index, url in enumerate(urls[:max_search_result]):
  77. res = scrape_text(url['link'], proxies)
  78. history.extend([f"第{index}份搜索结果:", res])
  79. chatbot.append([f"第{index}份搜索结果:", res[:500]+"......"])
  80. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 由于请求gpt需要一段时间,我们先及时地做一次界面更新
  81. # ------------- < 第3步:ChatGPT综合 > -------------
  82. i_say = f"从以上搜索结果中抽取信息,然后回答问题:{txt}"
  83. i_say, history = input_clipping( # 裁剪输入,从最长的条目开始裁剪,防止爆token
  84. inputs=i_say,
  85. history=history,
  86. max_token_limit=model_info[llm_kwargs['llm_model']]['max_token']*3//4
  87. )
  88. gpt_say = yield from request_gpt_model_in_new_thread_with_ui_alive(
  89. inputs=i_say, inputs_show_user=i_say,
  90. llm_kwargs=llm_kwargs, chatbot=chatbot, history=history,
  91. sys_prompt="请从给定的若干条搜索结果中抽取信息,对最相关的两个搜索结果进行总结,然后回答问题。"
  92. )
  93. chatbot[-1] = (i_say, gpt_say)
  94. history.append(i_say);history.append(gpt_say)
  95. yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 # 界面更新