run_eval.sh 991 B

123456789101112131415161718192021222324252627282930313233343536
  1. #!/bin/bash
  2. # The first positional argument
  3. predictions_path=$1
  4. # Check if predictions_path is not provided
  5. if [ -z "$predictions_path" ]; then
  6. echo "Usage: $0 <predictions_path> [dataset_name_or_path] [results_dir] [testbed_dir]"
  7. exit 1
  8. fi
  9. # Default values for the optional arguments
  10. dataset_name_or_path="${2:-princeton-nlp/SWE-bench}"
  11. results_dir="${3:-results}"
  12. testbed_dir="${4:-testbed}"
  13. # If results or testbed directories do not exist, create them
  14. if [ ! -d "$results_dir" ]; then
  15. mkdir -p "$results_dir"
  16. echo "Created results directory at $results_dir"
  17. fi
  18. if [ ! -d "$testbed_dir" ]; then
  19. mkdir -p "$testbed_dir"
  20. echo "Created testbed directory at $testbed_dir"
  21. fi
  22. # Run the Python script with the specified arguments
  23. python evaluation.py \
  24. --predictions_path "$predictions_path" \
  25. --swe_bench_tasks "$dataset_name_or_path" \
  26. --log_dir "$results_dir" \
  27. --testbed "$testbed_dir" \
  28. --skip_existing \
  29. --timeout 900 \
  30. --verbose