server.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. import http.server
  2. import json
  3. import os
  4. import socketserver
  5. from typing import Any, Dict, Optional, List
  6. import yaml
  7. from pathlib import Path
  8. from argparse import ArgumentParser
  9. from functools import partial
  10. def append_exit(content):
  11. last_entry = content["history"][-1]
  12. if last_entry["role"] == "system":
  13. return content
  14. exit_status = content.get("info", {}).get("exit_status", None)
  15. if exit_status is None:
  16. return content
  17. if exit_status.startswith("submitted"):
  18. if "submission" in content["info"]:
  19. submission = content["info"]["submission"]
  20. content["history"].append({
  21. "role": "model_patch",
  22. "content": submission,
  23. })
  24. # else submission should be in history already
  25. else:
  26. raise ValueError("No submission in history or info")
  27. # elif content.get("info", {}).get("exit_status", None) is not None:
  28. # content["history"].append({
  29. # "role": "system",
  30. # "content": f"Exited - {content['info']['exit_status']}",
  31. # })
  32. return content
  33. def append_patch(instance_id, content, patches, patch_type):
  34. if content.get("info", {}).get("exit_status", None) is not None:
  35. if instance_id in patches:
  36. content["history"].append({
  37. "role": f"{patch_type} Patch",
  38. "content": patches[instance_id],
  39. })
  40. return content
  41. def append_results(traj_path: Path, instance_id: str, content, results, results_file, scorecards, scorecards_file):
  42. stats: List[str] = []
  43. model_stats = {}
  44. if traj_path.exists():
  45. data = json.loads(traj_path.read_text())
  46. info = data.get("info", {})
  47. model_stats = info.get("model_stats", {})
  48. instance_cost = model_stats.get("instance_cost", None)
  49. instance_cost = f'{instance_cost:.2f}' if instance_cost is not None else 'N/A'
  50. tokens_sent = model_stats.get("tokens_sent", None)
  51. tokens_sent = f'{tokens_sent:,}' if tokens_sent is not None else 'N/A'
  52. tokens_received = model_stats.get("tokens_received", None)
  53. tokens_received = f'{tokens_received:,}' if tokens_received is not None else 'N/A'
  54. api_calls = model_stats.get("api_calls", None)
  55. api_calls = f'{api_calls:,}' if api_calls is not None else 'N/A'
  56. stats.append(f"**** Run Stats ****")
  57. stats.append(f"Instance Cost: ${instance_cost}")
  58. stats.append(f"Tokens Sent: {tokens_sent}")
  59. stats.append(f"Tokens Received: {tokens_received}")
  60. stats.append(f"API Calls: {api_calls}\n")
  61. status = []
  62. if results is None:
  63. status.append("Evaluation results not found")
  64. elif "not_generated" in results and "generated" in results and "applied" in results and "resolved" in results:
  65. is_generated = instance_id in results["generated"]
  66. is_applied = instance_id in results["applied"]
  67. is_resolved = instance_id in results["resolved"]
  68. status.append("**** Statuses ****")
  69. status.append(
  70. f" {'✅' if is_generated else '❌'} Generated (The agent was {'' if is_generated else 'not '}"
  71. "able to generate a pull request to address this issue)")
  72. status.append(
  73. f" {'✅' if is_applied else '❌'} Applied (The pull request was {'' if is_applied else 'not '}"
  74. "successfully applied to the repo during eval)")
  75. status.append(
  76. f" {'✅' if is_resolved else '❌'} Resolved (The pull request {'' if is_resolved else 'not '}"
  77. "successfully resolved the issue during eval)")
  78. else:
  79. status.append("Results format not recognized")
  80. if scorecards is not None:
  81. scorecard = [x for x in scorecards if x["instance_id"] == instance_id][0]
  82. if "test_results" in scorecard and "failure" in scorecard["test_results"] and (
  83. len(scorecard["test_results"]["failure"]["FAIL_TO_PASS"]) > 0 or
  84. len(scorecard["test_results"]["failure"]["PASS_TO_PASS"]) > 0
  85. ):
  86. tests_failing = [
  87. f" - {x}" for x in scorecard["test_results"]["failure"]["FAIL_TO_PASS"]
  88. ] + [
  89. f" - {x}" for x in scorecard["test_results"]["failure"]["PASS_TO_PASS"]
  90. ]
  91. status.extend(["", "**** Test Results ****", "🧪 Tests Failed"] + tests_failing[:7])
  92. if len(tests_failing) > 7:
  93. status.append(f" ... and {len(tests_failing) - 7} more")
  94. status.append("")
  95. if status == []:
  96. status.append("Instance not found in results")
  97. else:
  98. status.append("---------------------------")
  99. status.append("Note that the evaluation results here may not be accurate or up to date, since they are computed separately from the agent run itself.")
  100. status.append(f"Check {results_file} for the most accurate evaluation results.")
  101. status.append("")
  102. status.append(f"Instance ID: {instance_id}")
  103. status.append("Based on results:")
  104. status.append(json.dumps(results, indent=4))
  105. eval_report = {
  106. "role": "Evaluation Report",
  107. "content": "\n".join([*stats, *status]),
  108. }
  109. content["history"].insert(0, eval_report)
  110. content["history"].append(eval_report)
  111. return content
  112. def load_content(file_name, gold_patches, test_patches) -> Dict[str, Any]:
  113. with open(file_name) as infile:
  114. content = json.load(infile)
  115. results_file = Path(file_name).parent / "results.json"
  116. results = load_results(results_file)
  117. scorecards_file = Path(file_name).parent / "scorecards.json"
  118. scorecards = None
  119. if scorecards_file.exists():
  120. with open(scorecards_file) as infile:
  121. scorecards = json.load(infile)
  122. content = append_exit(content) # accommodate new and old format
  123. content = append_patch(Path(file_name).stem, content, gold_patches, "Gold")
  124. content = append_patch(Path(file_name).stem, content, test_patches, "Test")
  125. content = append_results(
  126. Path(file_name),
  127. Path(file_name).stem,
  128. content,
  129. results,
  130. results_file,
  131. scorecards,
  132. scorecards_file,
  133. )
  134. return content
  135. def load_results(results_path: Path) -> Optional[Dict[str, Any]]:
  136. """Load results from results.json.
  137. If file is not found, return None.
  138. """
  139. if not results_path.exists():
  140. return None
  141. with open(results_path) as infile:
  142. results = json.load(infile)
  143. # Different versions of the code used "not_generated" or "no_generation".
  144. # Let's standardize this here
  145. if "no_generation" in results:
  146. results["not_generated"] = results["no_generation"]
  147. del results["no_generation"]
  148. return results
  149. # todo: shouldn't reload results fore very status
  150. def get_status(traj_path) -> str:
  151. """Return results emoji for single trajectory"""
  152. results = load_results(Path(traj_path).parent / "results.json")
  153. instance_id = Path(traj_path).stem
  154. if results is None:
  155. return "❓"
  156. elif "not_generated" in results and "generated" in results and "applied" in results and "resolved" in results:
  157. if instance_id in results["not_generated"]:
  158. return "❓"
  159. if instance_id in results["generated"]:
  160. if instance_id in results["resolved"]:
  161. return "✅"
  162. else:
  163. return "❌"
  164. return "❓"
  165. class Handler(http.server.SimpleHTTPRequestHandler):
  166. file_mod_times = {} # Dictionary to keep track of file modification times
  167. def __init__(self, *args, **kwargs):
  168. self.gold_patches = {}
  169. self.test_patches = {}
  170. if "gold_patches" in kwargs:
  171. self.gold_patches = kwargs.pop("gold_patches")
  172. if "test_patches" in kwargs:
  173. self.test_patches = kwargs.pop("test_patches")
  174. self.traj_dir = kwargs.pop('directory', '.') # Extract directory
  175. super().__init__(*args, **kwargs)
  176. def serve_directory_info(self):
  177. self.send_response(200)
  178. self.send_header('Content-type', 'application/json')
  179. self.end_headers()
  180. self.wfile.write(json.dumps({"directory": self.traj_dir}).encode())
  181. def serve_file_content(self, file_path):
  182. try:
  183. content = load_content(
  184. Path(self.traj_dir) / file_path,
  185. self.gold_patches,
  186. self.test_patches,
  187. )
  188. self.send_response(200)
  189. self.send_header('Content-type', 'text/plain')
  190. self.end_headers()
  191. self.wfile.write(json.dumps(content).encode())
  192. except FileNotFoundError:
  193. self.send_error(404, f"File {file_path} not found")
  194. def do_GET(self):
  195. if self.path == '/directory_info':
  196. self.serve_directory_info()
  197. elif self.path.startswith('/files'):
  198. self.handle_files_request()
  199. elif self.path.startswith('/trajectory/'):
  200. file_path = self.path[len('/trajectory/'):]
  201. self.serve_file_content(file_path)
  202. elif self.path.startswith('/check_update'):
  203. self.check_for_updates()
  204. else:
  205. super().do_GET()
  206. def handle_files_request(self):
  207. self.send_response(200)
  208. self.send_header('Content-type', 'application/json')
  209. self.end_headers()
  210. files = sorted(
  211. [
  212. str(file.relative_to(Path(self.traj_dir))) + " " * 4 + get_status(file)
  213. for file in Path(self.traj_dir).glob('**/*.traj')
  214. ],
  215. key=lambda x: str(Path(self.traj_dir) / x), reverse=True
  216. )
  217. self.wfile.write(json.dumps(files).encode())
  218. def check_for_updates(self):
  219. current_mod_times = {str(file): os.path.getmtime(file) for file in Path(self.traj_dir).glob('**/*.traj')}
  220. if current_mod_times != Handler.file_mod_times:
  221. Handler.file_mod_times = current_mod_times
  222. self.send_response(200) # Send response that there's an update
  223. else:
  224. self.send_response(204) # Send no content response if no update
  225. self.end_headers()
  226. def end_headers(self):
  227. self.send_header('Access-Control-Allow-Origin', '*')
  228. super().end_headers()
  229. def main(data_path, directory, port):
  230. data = []
  231. if data_path is not None:
  232. if data_path.endswith(".jsonl"):
  233. data = [json.loads(x) for x in open(data_path).readlines()]
  234. elif data_path.endswith(".json"):
  235. data = json.load(open(data_path))
  236. elif "args.yaml" in os.listdir(directory):
  237. args = yaml.safe_load(open(os.path.join(directory, "args.yaml")))
  238. if "environment" in args and "data_path" in args["environment"]:
  239. data_path = os.path.join(
  240. Path(__file__).parent, "..",
  241. args["environment"]["data_path"]
  242. )
  243. if os.path.exists(data_path):
  244. data = json.load(open(data_path, "r"))
  245. gold_patches = {
  246. d["instance_id"]: d["patch"]
  247. if "patch" in d else None for d in data
  248. }
  249. test_patches = {
  250. d["instance_id"]: d["test_patch"]
  251. if "test_patch" in d else None for d in data
  252. }
  253. handler_with_directory = partial(
  254. Handler,
  255. directory=directory,
  256. gold_patches=gold_patches,
  257. test_patches=test_patches,
  258. )
  259. try:
  260. with socketserver.TCPServer(("", port), handler_with_directory) as httpd:
  261. print(f"Serving at http://localhost:{port}")
  262. httpd.serve_forever()
  263. except OSError as e:
  264. if e.errno == 48:
  265. print(f"ERROR: Port ({port}) is already in use. Try another port with the --port flag.")
  266. else:
  267. raise e
  268. if __name__ == "__main__":
  269. parser = ArgumentParser()
  270. parser.add_argument("--data_path", type=str, help="Path to dataset that was used for the trajectories")
  271. parser.add_argument("--directory", type=str, help="Directory to serve", default="./trajectories", nargs='?')
  272. parser.add_argument("--port", type=int, help="Port to serve", default=8000)
  273. args = parser.parse_args()
  274. main(**vars(args))