#!/bin/bash #this script runs and evaluates the agent N times. #to run: #bash run_and_eval.sh '' default_with_inclusive_edit_demo_v2 data/dev-easy/swe-bench-dev-easy-med.json 3 # vars: suffix template data number of runs # define user variables suffix=${1:-''} template=$2 dataset_path=$3 num_runs=$4 # extract filename from the dataset path dataset_name=`basename $dataset_path` for((i=1; i<=num_runs; i++)); do # command 1 python run.py --model_name gpt4 --data_path $dataset_path --config_file config/configs/$template.yaml --suffix ${suffix}run${i} --temperature 0.2 --top_p 0.95 --per_instance_cost_limit 3.00 --install_environment 1 # command 2 python evaluation/evaluation.py \ --predictions_path trajectories/$USER/gpt4__${dataset_name}__$template__t-0.20__p-0.95__c-3.00__install-1__${suffix}run${i}/all_preds.jsonl \ --swe_bench_tasks $dataset_path \ --log_dir ./results \ --testbed ./testbed \ --skip_existing \ --timeout 900 \ --verbose done