123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304 |
- import http.server
- import json
- import os
- import socketserver
- import traceback
- import yaml
- from pathlib import Path
- from argparse import ArgumentParser
- from functools import partial
- def append_exit(content):
- last_entry = content["history"][-1]
- if last_entry["role"] == "system":
- return content
-
- exit_status = content.get("info", {}).get("exit_status", None)
- if exit_status is None:
- return content
- if exit_status.startswith("submitted"):
- if "submission" in content["info"]:
- submission = content["info"]["submission"]
- content["history"].append({
- "role": "model_patch",
- "content": submission,
- })
- # else submission should be in history already
- else:
- raise ValueError("No submission in history or info")
- # elif content.get("info", {}).get("exit_status", None) is not None:
- # content["history"].append({
- # "role": "system",
- # "content": f"Exited - {content['info']['exit_status']}",
- # })
- return content
- def append_patch(instance_id, content, patches, patch_type):
- if content.get("info", {}).get("exit_status", None) is not None:
- if instance_id in patches:
- content["history"].append({
- "role": f"{patch_type} Patch",
- "content": patches[instance_id],
- })
- return content
- def append_results(traj_path, instance_id, content, results, results_file, scorecards, scorecards_file):
- stats = []
- model_stats = {}
- if traj_path.exists():
- data = json.loads(traj_path.read_text())
- info = data.get("info", {})
- model_stats = info.get("model_stats", {})
- instance_cost = model_stats.get("instance_cost", None)
- instance_cost = f'{instance_cost:.2f}' if instance_cost is not None else 'N/A'
- tokens_sent = model_stats.get("tokens_sent", None)
- tokens_sent = f'{tokens_sent:,}' if tokens_sent is not None else 'N/A'
- tokens_received = model_stats.get("tokens_received", None)
- tokens_received = f'{tokens_received:,}' if tokens_received is not None else 'N/A'
- api_calls = model_stats.get("api_calls", None)
- api_calls = f'{api_calls:,}' if api_calls is not None else 'N/A'
- stats.append(f"**** Run Stats ****")
- stats.append(f"Instance Cost: ${instance_cost}")
- stats.append(f"Tokens Sent: {tokens_sent}")
- stats.append(f"Tokens Received: {tokens_received}")
- stats.append(f"API Calls: {api_calls}\n")
- status = []
- if results is None:
- status.append("Evaluation results not found")
- elif "not_generated" in results and "generated" in results and "applied" in results and "resolved" in results:
- is_generated = instance_id in results["generated"]
- is_applied = instance_id in results["applied"]
- is_resolved = instance_id in results["resolved"]
- status.append("**** Statuses ****")
- status.append(
- f" {'✅' if is_generated else '❌'} Generated (The agent was {'' if is_generated else 'not '}"
- "able to generate a pull request to address this issue)")
- status.append(
- f" {'✅' if is_applied else '❌'} Applied (The pull request was {'' if is_applied else 'not '}"
- "successfully applied to the repo during eval)")
- status.append(
- f" {'✅' if is_resolved else '❌'} Resolved (The pull request {'' if is_resolved else 'not '}"
- "successfully resolved the issue during eval)")
- else:
- status.append("Results format not recognized")
- if scorecards is not None:
- scorecard = [x for x in scorecards if x["instance_id"] == instance_id][0]
- if "test_results" in scorecard and "failure" in scorecard["test_results"] and (
- len(scorecard["test_results"]["failure"]["FAIL_TO_PASS"]) > 0 or
- len(scorecard["test_results"]["failure"]["PASS_TO_PASS"]) > 0
- ):
- tests_failing = [
- f" - {x}" for x in scorecard["test_results"]["failure"]["FAIL_TO_PASS"]
- ] + [
- f" - {x}" for x in scorecard["test_results"]["failure"]["PASS_TO_PASS"]
- ]
- status.extend(["", "**** Test Results ****", "🧪 Tests Failed"] + tests_failing[:7])
- if len(tests_failing) > 7:
- status.append(f" ... and {len(tests_failing) - 7} more")
- status.append("")
- if status == []:
- status.append("Instance not found in results")
- else:
- status.append("---------------------------")
- status.append("Note that the evaluation results here may not be accurate or up to date, since they are computed seperately from the agent run itself.")
- results_relative = results_file.resolve().relative_to(Path(__file__).resolve().parent.parent)
- status.append(f"Check {results_relative} for the most accurate evaluation results.")
- status.append("")
- status.append(f"Instance ID: {instance_id}")
- status.append("Based on results:")
- status.append(json.dumps(results, indent=4))
- eval_report = {
- "role": "Evaluation Report",
- "content": "\n".join([*stats, *status]),
- }
- content["history"].insert(0, eval_report)
- content["history"].append(eval_report)
- return content
- def load_content(file_name, gold_patches, test_patches):
- with open(file_name) as infile:
- content = json.load(infile)
- results_file = Path(file_name).parent / "results.json"
- results = None
- if results_file.exists():
- with open(results_file) as infile:
- results = json.load(infile)
- scorecards_file = Path(file_name).parent / "scorecards.json"
- scorecards = None
- if scorecards_file.exists():
- with open(scorecards_file) as infile:
- scorecards = json.load(infile)
- content = append_exit(content) # accomodate new and old format
- content = append_patch(Path(file_name).stem, content, gold_patches, "Gold")
- content = append_patch(Path(file_name).stem, content, test_patches, "Test")
- content = append_results(
- Path(file_name),
- Path(file_name).stem,
- content,
- results,
- results_file,
- scorecards,
- scorecards_file,
- )
- return content
- def load_results(traj_path):
- results_file = Path(traj_path).parent / "results.json"
- if results_file.exists():
- with open(results_file) as infile:
- return json.load(infile)
- return None
- def get_status(traj_path):
- results = load_results(traj_path)
- instance_id = Path(traj_path).stem
- if results is None:
- return "❓"
- elif "not_generated" in results and "generated" in results and "applied" in results and "resolved" in results:
- if instance_id in results["not_generated"]:
- return "❓"
- if instance_id in results["generated"]:
- if instance_id in results["resolved"]:
- return "✅"
- else:
- return "❌"
- return "❓"
- class Handler(http.server.SimpleHTTPRequestHandler):
- file_mod_times = {} # Dictionary to keep track of file modification times
- def __init__(self, *args, **kwargs):
- self.gold_patches = {}
- self.test_patches = {}
- if "gold_patches" in kwargs:
- self.gold_patches = kwargs.pop("gold_patches")
- if "test_patches" in kwargs:
- self.test_patches = kwargs.pop("test_patches")
- self.traj_dir = kwargs.pop('directory', '.') # Extract directory
- super().__init__(*args, **kwargs)
- def serve_directory_info(self):
- self.send_response(200)
- self.send_header('Content-type', 'application/json')
- self.end_headers()
- self.wfile.write(json.dumps({"directory": self.traj_dir}).encode())
- def serve_file_content(self, file_path):
- try:
- content = load_content(
- Path(self.traj_dir) / file_path,
- self.gold_patches,
- self.test_patches,
- )
- self.send_response(200)
- self.send_header('Content-type', 'text/plain')
- self.end_headers()
- self.wfile.write(json.dumps(content).encode())
- except FileNotFoundError:
- self.send_error(404, f"File {file_path} not found")
- def do_GET(self):
- if self.path == '/directory_info':
- self.serve_directory_info()
- elif self.path.startswith('/files'):
- self.handle_files_request()
- elif self.path.startswith('/trajectory/'):
- file_path = self.path[len('/trajectory/'):]
- self.serve_file_content(file_path)
- elif self.path.startswith('/check_update'):
- self.check_for_updates()
- else:
- super().do_GET()
- def handle_files_request(self):
- self.send_response(200)
- self.send_header('Content-type', 'application/json')
- self.end_headers()
- files = sorted(
- [
- str(file.relative_to(Path(self.traj_dir))) + " " * 4 + get_status(file)
- for file in Path(self.traj_dir).glob('**/*.traj')
- ],
- key=lambda x: str(Path(self.traj_dir) / x), reverse=True
- )
- self.wfile.write(json.dumps(files).encode())
- def check_for_updates(self):
- current_mod_times = {str(file): os.path.getmtime(file) for file in Path(self.traj_dir).glob('**/*.traj')}
- if current_mod_times != Handler.file_mod_times:
- Handler.file_mod_times = current_mod_times
- self.send_response(200) # Send response that there's an update
- else:
- self.send_response(204) # Send no content response if no update
- self.end_headers()
- def end_headers(self):
- self.send_header('Access-Control-Allow-Origin', '*')
- super().end_headers()
- def main(data_path, directory, port):
- data = []
- if data_path is not None:
- if data_path.endswith(".jsonl"):
- data = [json.loads(x) for x in open(data_path).readlines()]
- elif data_path.endswith(".json"):
- data = json.load(open(data_path))
- elif "args.yaml" in os.listdir(directory):
- args = yaml.safe_load(open(os.path.join(directory, "args.yaml")))
- if "environment" in args and "data_path" in args["environment"]:
- data_path = os.path.join(
- Path(__file__).parent, "..",
- args["environment"]["data_path"]
- )
- if os.path.exists(data_path):
- data = json.load(open(data_path, "r"))
- gold_patches = {
- d["instance_id"]: d["patch"]
- if "patch" in d else None for d in data
- }
- test_patches = {
- d["instance_id"]: d["test_patch"]
- if "test_patch" in d else None for d in data
- }
- handler_with_directory = partial(
- Handler,
- directory=directory,
- gold_patches=gold_patches,
- test_patches=test_patches,
- )
- try:
- with socketserver.TCPServer(("", port), handler_with_directory) as httpd:
- print(f"Serving at http://localhost:{port}")
- httpd.serve_forever()
- except OSError as e:
- if e.errno == 48:
- print(f"ERROR: Port ({port}) is already in use. Try another port with the --port flag.")
- else:
- raise e
- if __name__ == "__main__":
- parser = ArgumentParser()
- parser.add_argument("--data_path", type=str, help="Path to dataset that was used for the trajectories")
- parser.add_argument("--directory", type=str, help="Directory to serve", default="./trajectories", nargs='?')
- parser.add_argument("--port", type=int, help="Port to serve", default=8000)
- args = parser.parse_args()
- main(**vars(args))
|