123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- import json
- import os
- import subprocess
- import yaml
- from argparse import ArgumentParser
- def process_synthetic_trajs(action_trajs_path: str, config_file: str, suffix: str):
- # Load action trajectories, task instances
- action_trajs = [json.loads(x) for x in open(action_trajs_path, "r").readlines()]
- task_instances = [x["task_instance"] for x in action_trajs]
- file_name = action_trajs_path.rsplit("/", 1)[-1]
- # Temporary file names
- replay_action_trajs_path = "temp_actions.jsonl"
- replay_task_instances_path = file_name
- # Write task_instances to file for data_path
- with open(replay_task_instances_path, "w") as f:
- for t in task_instances:
- print(json.dumps(t), file=f, end="\n", flush=True)
- # Write action trajectories to a file
- with open(replay_action_trajs_path, "w") as f:
- for t in action_trajs:
- print(
- json.dumps({t["task_instance"]["instance_id"]: t["actions"]}),
- file=f,
- end="\n",
- flush=True,
- )
- # Call run.py via subprocess
- command = [
- "python",
- "run.py",
- "--config_file", config_file,
- "--data_path", replay_task_instances_path,
- "--install_environment", "True",
- "--model_name", "replay",
- "--replay_path", replay_action_trajs_path
- ]
- if suffix is not None:
- command.extend(["--suffix", suffix])
- subprocess.run(command)
- os.remove(replay_action_trajs_path)
- os.remove(replay_task_instances_path)
- def process_single_traj(traj_path: str, config_file: str, data_path: str, suffix: str):
- replay_action_trajs_path = "temp_replay.jsonl"
- # Open trajectory file, extract responses as actions
- if traj_path.endswith(".yaml"):
- traj_data = dict()
- with open(traj_path, "r") as f:
- traj_data["history"] = yaml.safe_load(f)
- else:
- traj_data = json.load(open(traj_path, "r"))
- actions = [x["content"] for x in traj_data["history"] if x["role"] == "assistant"]
- instance_id = traj_path.split("/")[-1].split(".")[0]
- with open(replay_action_trajs_path, "w") as f:
- print(
- json.dumps({instance_id: actions}),
- file=f,
- end="\n",
- flush=True
- )
- replay_task_instances_path = instance_id + ".jsonl"
- # Get data_path from args.yaml
- if data_path is None:
- args_path = os.path.join(
- os.path.dirname(traj_path),
- "args.yaml"
- )
- args = yaml.safe_load(open(args_path))
- data_path = args['environment']['data_path']
- # Identify the relevant task instance and create it
- data = None
- if data_path.endswith(".jsonl"):
- data = [json.loads(x) for x in open(data_path, "r").readlines()]
- elif data_path.endswith(".json"):
- data = json.load(open(data_path))
- else:
- raise ValueError("--data_path must be a .json or .jsonl")
- data = [d for d in data if d["instance_id"] == instance_id]
- with open(replay_task_instances_path, "w") as f:
- for d in data:
- print(json.dumps(d), file=f, end="\n", flush=True)
- # Call run.py via subprocess
- command = [
- "python",
- "run.py",
- "--config_file", config_file,
- "--data_path", replay_task_instances_path,
- "--install_environment", "True",
- "--model_name", "replay",
- "--replay_path", replay_action_trajs_path,
- ]
- if suffix is not None:
- command.extend(["--suffix", suffix])
- subprocess.run(command)
- os.remove(replay_action_trajs_path)
- os.remove(replay_task_instances_path)
- def main(
- action_trajs_path: str,
- traj_path: str,
- config_file: str,
- data_path: str,
- suffix: str,
- ):
- if action_trajs_path is not None:
- process_synthetic_trajs(action_trajs_path, config_file, suffix)
- elif traj_path is not None:
- process_single_traj(traj_path, config_file, data_path, suffix)
- else:
- print(
- "No replays generated.\n"
- "You must either provide one of the following. Either...\n"
- "\t* --action_trajs_path for replaying synthetic trajectories\n"
- "\t* --traj_path for replaying SWE-agent style trajectories (from ./trajectories folder)\n"
- )
- if __name__ == "__main__":
- parser = ArgumentParser()
- parser.add_argument("--action_trajs_path", help="Path to action trajectories to replay", default=None)
- parser.add_argument("--traj_path", help="Path to trajectory to replay", default=None)
- parser.add_argument("--config_file", help="Path to template", required=True)
- parser.add_argument("--data_path", help="(Optional) Path to data file containing task instances ref'ed by replay trajectories", default=None)
- parser.add_argument("--suffix", help="(Optional) Suffix argument appended to end of traj path", default=None)
- args = parser.parse_args()
- main(**vars(args))
|