server.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. import http.server
  2. import json
  3. import os
  4. import socketserver
  5. import traceback
  6. import yaml
  7. from pathlib import Path
  8. from argparse import ArgumentParser
  9. from functools import partial
  10. def append_exit(content):
  11. last_entry = content["history"][-1]
  12. if last_entry["role"] == "system":
  13. return content
  14. exit_status = content.get("info", {}).get("exit_status", None)
  15. if exit_status is None:
  16. return content
  17. if exit_status.startswith("submitted"):
  18. if "submission" in content["info"]:
  19. submission = content["info"]["submission"]
  20. content["history"].append({
  21. "role": "model_patch",
  22. "content": submission,
  23. })
  24. # else submission should be in history already
  25. else:
  26. raise ValueError("No submission in history or info")
  27. # elif content.get("info", {}).get("exit_status", None) is not None:
  28. # content["history"].append({
  29. # "role": "system",
  30. # "content": f"Exited - {content['info']['exit_status']}",
  31. # })
  32. return content
  33. def append_patch(instance_id, content, patches, patch_type):
  34. if content.get("info", {}).get("exit_status", None) is not None:
  35. if instance_id in patches:
  36. content["history"].append({
  37. "role": f"{patch_type} Patch",
  38. "content": patches[instance_id],
  39. })
  40. return content
  41. def append_results(traj_path, instance_id, content, results, results_file, scorecards, scorecards_file):
  42. stats = []
  43. model_stats = {}
  44. if traj_path.exists():
  45. data = json.loads(traj_path.read_text())
  46. info = data.get("info", {})
  47. model_stats = info.get("model_stats", {})
  48. instance_cost = model_stats.get("instance_cost", None)
  49. instance_cost = f'{instance_cost:.2f}' if instance_cost is not None else 'N/A'
  50. tokens_sent = model_stats.get("tokens_sent", None)
  51. tokens_sent = f'{tokens_sent:,}' if tokens_sent is not None else 'N/A'
  52. tokens_received = model_stats.get("tokens_received", None)
  53. tokens_received = f'{tokens_received:,}' if tokens_received is not None else 'N/A'
  54. api_calls = model_stats.get("api_calls", None)
  55. api_calls = f'{api_calls:,}' if api_calls is not None else 'N/A'
  56. stats.append(f"**** Run Stats ****")
  57. stats.append(f"Instance Cost: ${instance_cost}")
  58. stats.append(f"Tokens Sent: {tokens_sent}")
  59. stats.append(f"Tokens Received: {tokens_received}")
  60. stats.append(f"API Calls: {api_calls}\n")
  61. status = []
  62. if results is None:
  63. status.append("Evaluation results not found")
  64. elif "not_generated" in results and "generated" in results and "applied" in results and "resolved" in results:
  65. is_generated = instance_id in results["generated"]
  66. is_applied = instance_id in results["applied"]
  67. is_resolved = instance_id in results["resolved"]
  68. status.append("**** Statuses ****")
  69. status.append(
  70. f" {'✅' if is_generated else '❌'} Generated (The agent was {'' if is_generated else 'not '}"
  71. "able to generate a pull request to address this issue)")
  72. status.append(
  73. f" {'✅' if is_applied else '❌'} Applied (The pull request was {'' if is_applied else 'not '}"
  74. "successfully applied to the repo during eval)")
  75. status.append(
  76. f" {'✅' if is_resolved else '❌'} Resolved (The pull request {'' if is_resolved else 'not '}"
  77. "successfully resolved the issue during eval)")
  78. else:
  79. status.append("Results format not recognized")
  80. if scorecards is not None:
  81. scorecard = [x for x in scorecards if x["instance_id"] == instance_id][0]
  82. if "test_results" in scorecard and "failure" in scorecard["test_results"] and (
  83. len(scorecard["test_results"]["failure"]["FAIL_TO_PASS"]) > 0 or
  84. len(scorecard["test_results"]["failure"]["PASS_TO_PASS"]) > 0
  85. ):
  86. tests_failing = [
  87. f" - {x}" for x in scorecard["test_results"]["failure"]["FAIL_TO_PASS"]
  88. ] + [
  89. f" - {x}" for x in scorecard["test_results"]["failure"]["PASS_TO_PASS"]
  90. ]
  91. status.extend(["", "**** Test Results ****", "🧪 Tests Failed"] + tests_failing[:7])
  92. if len(tests_failing) > 7:
  93. status.append(f" ... and {len(tests_failing) - 7} more")
  94. status.append("")
  95. if status == []:
  96. status.append("Instance not found in results")
  97. else:
  98. status.append("---------------------------")
  99. status.append("Note that the evaluation results here may not be accurate or up to date, since they are computed separately from the agent run itself.")
  100. results_relative = results_file.resolve().relative_to(Path(__file__).resolve().parent.parent)
  101. status.append(f"Check {results_relative} for the most accurate evaluation results.")
  102. status.append("")
  103. status.append(f"Instance ID: {instance_id}")
  104. status.append("Based on results:")
  105. status.append(json.dumps(results, indent=4))
  106. eval_report = {
  107. "role": "Evaluation Report",
  108. "content": "\n".join([*stats, *status]),
  109. }
  110. content["history"].insert(0, eval_report)
  111. content["history"].append(eval_report)
  112. return content
  113. def load_content(file_name, gold_patches, test_patches):
  114. with open(file_name) as infile:
  115. content = json.load(infile)
  116. results_file = Path(file_name).parent / "results.json"
  117. results = None
  118. if results_file.exists():
  119. with open(results_file) as infile:
  120. results = json.load(infile)
  121. scorecards_file = Path(file_name).parent / "scorecards.json"
  122. scorecards = None
  123. if scorecards_file.exists():
  124. with open(scorecards_file) as infile:
  125. scorecards = json.load(infile)
  126. content = append_exit(content) # accomodate new and old format
  127. content = append_patch(Path(file_name).stem, content, gold_patches, "Gold")
  128. content = append_patch(Path(file_name).stem, content, test_patches, "Test")
  129. content = append_results(
  130. Path(file_name),
  131. Path(file_name).stem,
  132. content,
  133. results,
  134. results_file,
  135. scorecards,
  136. scorecards_file,
  137. )
  138. return content
  139. def load_results(traj_path):
  140. results_file = Path(traj_path).parent / "results.json"
  141. if results_file.exists():
  142. with open(results_file) as infile:
  143. return json.load(infile)
  144. return None
  145. def get_status(traj_path):
  146. results = load_results(traj_path)
  147. instance_id = Path(traj_path).stem
  148. if results is None:
  149. return "❓"
  150. elif "not_generated" in results and "generated" in results and "applied" in results and "resolved" in results:
  151. if instance_id in results["not_generated"]:
  152. return "❓"
  153. if instance_id in results["generated"]:
  154. if instance_id in results["resolved"]:
  155. return "✅"
  156. else:
  157. return "❌"
  158. return "❓"
  159. class Handler(http.server.SimpleHTTPRequestHandler):
  160. file_mod_times = {} # Dictionary to keep track of file modification times
  161. def __init__(self, *args, **kwargs):
  162. self.gold_patches = {}
  163. self.test_patches = {}
  164. if "gold_patches" in kwargs:
  165. self.gold_patches = kwargs.pop("gold_patches")
  166. if "test_patches" in kwargs:
  167. self.test_patches = kwargs.pop("test_patches")
  168. self.traj_dir = kwargs.pop('directory', '.') # Extract directory
  169. super().__init__(*args, **kwargs)
  170. def serve_directory_info(self):
  171. self.send_response(200)
  172. self.send_header('Content-type', 'application/json')
  173. self.end_headers()
  174. self.wfile.write(json.dumps({"directory": self.traj_dir}).encode())
  175. def serve_file_content(self, file_path):
  176. try:
  177. content = load_content(
  178. Path(self.traj_dir) / file_path,
  179. self.gold_patches,
  180. self.test_patches,
  181. )
  182. self.send_response(200)
  183. self.send_header('Content-type', 'text/plain')
  184. self.end_headers()
  185. self.wfile.write(json.dumps(content).encode())
  186. except FileNotFoundError:
  187. self.send_error(404, f"File {file_path} not found")
  188. def do_GET(self):
  189. if self.path == '/directory_info':
  190. self.serve_directory_info()
  191. elif self.path.startswith('/files'):
  192. self.handle_files_request()
  193. elif self.path.startswith('/trajectory/'):
  194. file_path = self.path[len('/trajectory/'):]
  195. self.serve_file_content(file_path)
  196. elif self.path.startswith('/check_update'):
  197. self.check_for_updates()
  198. else:
  199. super().do_GET()
  200. def handle_files_request(self):
  201. self.send_response(200)
  202. self.send_header('Content-type', 'application/json')
  203. self.end_headers()
  204. files = sorted(
  205. [
  206. str(file.relative_to(Path(self.traj_dir))) + " " * 4 + get_status(file)
  207. for file in Path(self.traj_dir).glob('**/*.traj')
  208. ],
  209. key=lambda x: str(Path(self.traj_dir) / x), reverse=True
  210. )
  211. self.wfile.write(json.dumps(files).encode())
  212. def check_for_updates(self):
  213. current_mod_times = {str(file): os.path.getmtime(file) for file in Path(self.traj_dir).glob('**/*.traj')}
  214. if current_mod_times != Handler.file_mod_times:
  215. Handler.file_mod_times = current_mod_times
  216. self.send_response(200) # Send response that there's an update
  217. else:
  218. self.send_response(204) # Send no content response if no update
  219. self.end_headers()
  220. def end_headers(self):
  221. self.send_header('Access-Control-Allow-Origin', '*')
  222. super().end_headers()
  223. def main(data_path, directory, port):
  224. data = []
  225. if data_path is not None:
  226. if data_path.endswith(".jsonl"):
  227. data = [json.loads(x) for x in open(data_path).readlines()]
  228. elif data_path.endswith(".json"):
  229. data = json.load(open(data_path))
  230. elif "args.yaml" in os.listdir(directory):
  231. args = yaml.safe_load(open(os.path.join(directory, "args.yaml")))
  232. if "environment" in args and "data_path" in args["environment"]:
  233. data_path = os.path.join(
  234. Path(__file__).parent, "..",
  235. args["environment"]["data_path"]
  236. )
  237. if os.path.exists(data_path):
  238. data = json.load(open(data_path, "r"))
  239. gold_patches = {
  240. d["instance_id"]: d["patch"]
  241. if "patch" in d else None for d in data
  242. }
  243. test_patches = {
  244. d["instance_id"]: d["test_patch"]
  245. if "test_patch" in d else None for d in data
  246. }
  247. handler_with_directory = partial(
  248. Handler,
  249. directory=directory,
  250. gold_patches=gold_patches,
  251. test_patches=test_patches,
  252. )
  253. try:
  254. with socketserver.TCPServer(("", port), handler_with_directory) as httpd:
  255. print(f"Serving at http://localhost:{port}")
  256. httpd.serve_forever()
  257. except OSError as e:
  258. if e.errno == 48:
  259. print(f"ERROR: Port ({port}) is already in use. Try another port with the --port flag.")
  260. else:
  261. raise e
  262. if __name__ == "__main__":
  263. parser = ArgumentParser()
  264. parser.add_argument("--data_path", type=str, help="Path to dataset that was used for the trajectories")
  265. parser.add_argument("--directory", type=str, help="Directory to serve", default="./trajectories", nargs='?')
  266. parser.add_argument("--port", type=int, help="Port to serve", default=8000)
  267. args = parser.parse_args()
  268. main(**vars(args))