| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| if [ -f ~/.apikeys.sh ]; then |
| source ~/.apikeys.sh |
| fi |
|
|
| |
| RUN_STEP1=false |
| RUN_STEP2=false |
| RUN_STEP3=false |
| RUN_STEP4=false |
| RUN_STEP5=false |
| VALIDATE_STEPS=false |
| OUTPUT_DIR="results" |
| CONFIG="config.yml" |
|
|
| |
| PROJECT_ROOT="$(pwd)" |
|
|
|
|
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| --step1) |
| RUN_STEP1=true |
| shift |
| ;; |
| --step2) |
| RUN_STEP2=true |
| shift |
| ;; |
| --step3) |
| RUN_STEP3=true |
| shift |
| ;; |
| --step4) |
| RUN_STEP4=true |
| shift |
| ;; |
| --step5) |
| RUN_STEP5=true |
| shift |
| ;; |
| --validate) |
| VALIDATE_STEPS=true |
| shift |
| ;; |
| --out-dir) |
| OUTPUT_DIR="$2" |
| shift |
| shift |
| ;; |
| --job-id) |
| |
| OUTPUT_DIR="results_job_$2" |
| shift |
| shift |
| ;; |
| --auto-dir) |
| |
| TIMESTAMP=$(date +"%Y%m%d_%H%M%S") |
| OUTPUT_DIR="results_${TIMESTAMP}" |
| shift |
| ;; |
| --config) |
| CONFIG="$2" |
| shift |
| shift |
| ;; |
| --help|-h) |
| echo "Usage: $0 [OPTIONS]" |
| echo "" |
| echo "Run Snakemake workflows for ATLAS analysis" |
| echo "" |
| echo "Options:" |
| echo " --step1 Run summarize_root workflow (both rules: data generation + prompt processing)" |
| echo " --step2 Run create_numpy workflow" |
| echo " --step3 Run preprocess workflow" |
| echo " --step4 Run scores workflow" |
| echo " --step5 Run categorization workflow" |
| echo " --validate Run validation after each successful step" |
| echo " --out-dir DIR Custom output directory (default: results)" |
| echo " --job-id ID Create unique directory: results_job_ID" |
| echo " --auto-dir Create unique directory with timestamp: results_YYYYMMDD_HHMMSS" |
| echo " --help Show this help message" |
| echo "" |
| echo "Examples:" |
| echo " $0 --step1 --auto-dir # results_20250916_143052/" |
| echo " $0 --step1 --job-id 12345 # results_job_12345/" |
| echo " $0 --step1 --out-dir my_run_1 # my_run_1/" |
| echo "" |
| echo "If no options are provided, all steps are run sequentially." |
| exit 0 |
| ;; |
| *) |
| echo "Unknown option: $1" |
| echo "Use --help for usage information" |
| exit 1 |
| ;; |
| esac |
| done |
|
|
| |
| if [[ "$RUN_STEP1" == "false" && "$RUN_STEP2" == "false" && "$RUN_STEP3" == "false" && "$RUN_STEP4" == "false" && "$RUN_STEP5" == "false" ]]; then |
| RUN_STEP1=true |
| RUN_STEP2=true |
| RUN_STEP3=true |
| RUN_STEP4=true |
| RUN_STEP5=true |
| echo "=== Running All Snakemake Workflows Sequentially (Output: ${OUTPUT_DIR}) ===" |
| else |
| echo "=== Running Selected Snakemake Workflows (Output: ${OUTPUT_DIR}) ===" |
| fi |
| echo "" |
|
|
| |
| module load python |
| conda activate llm_env |
|
|
| |
| if [[ "${CONFIG}" = /* ]]; then |
| CONFIG_PATH="${CONFIG}" |
| else |
| CONFIG_PATH="${PROJECT_ROOT}/${CONFIG}" |
| fi |
|
|
| if [[ ! -f "${CONFIG_PATH}" ]]; then |
| echo "❌ Config file not found at ${CONFIG_PATH}" |
| exit 1 |
| fi |
|
|
| |
|
|
| OUTPUT_DIR="${OUTPUT_DIR%/}" |
| if [[ "${OUTPUT_DIR}" = /* ]]; then |
| BASE_DIR="${OUTPUT_DIR}" |
| else |
| BASE_DIR="$PWD/${OUTPUT_DIR}" |
| fi |
|
|
| echo "Preparing workflow files..." |
| mkdir -p ${OUTPUT_DIR}/prompts_temp |
| cp -r prompts/* ${OUTPUT_DIR}/prompts_temp/ |
| sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/prompts_temp/*.txt |
|
|
| cp workflow/summarize_root.smk ${OUTPUT_DIR}/summarize_root_temp.smk |
| cp workflow/create_numpy.smk ${OUTPUT_DIR}/create_numpy_temp.smk |
| cp workflow/preprocess.smk ${OUTPUT_DIR}/preprocess_temp.smk |
| cp workflow/scores.smk ${OUTPUT_DIR}/scores_temp.smk |
| cp workflow/categorization.smk ${OUTPUT_DIR}/categorization_temp.smk |
| cp supervisor_coder.py ${OUTPUT_DIR}/supervisor_coder.py |
| cp write_prompt.py ${OUTPUT_DIR}/write_prompt.py |
| cp check_soln.py ${OUTPUT_DIR}/check_soln.py |
|
|
| sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/*_temp.smk |
| |
| sed -i "s#{CONFIG}#${CONFIG_PATH}#g" ${OUTPUT_DIR}/*_temp.smk |
|
|
| |
| echo "Copying reference solution arrays for validation..." |
| mkdir -p ${OUTPUT_DIR}/solution/arrays |
| |
| rm -f ${OUTPUT_DIR}/solution/arrays/* |
| cp solution/arrays/* ${OUTPUT_DIR}/solution/arrays/ |
|
|
| |
| mkdir -p ${OUTPUT_DIR}/generated_code |
| mkdir -p ${OUTPUT_DIR}/logs |
| cp utils.py ${OUTPUT_DIR}/generated_code/utils.py |
|
|
| |
| rm -f ${OUTPUT_DIR}/logs/success.npy ${OUTPUT_DIR}/logs/calls.npy ${OUTPUT_DIR}/logs/input_tokens.npy ${OUTPUT_DIR}/logs/output_tokens.npy |
|
|
| echo "Starting sequential execution..." |
| echo "" |
|
|
| |
| run_workflow() { |
| local workflow_name=$1 |
| local smk_file=$2 |
| local target=$3 |
| local step_number=$4 |
|
|
| echo "=========================================" |
| echo "Running: $workflow_name" |
| echo "Target: $target" |
| echo "Time: $(date)" |
| echo "=========================================" |
|
|
| |
| if ! pushd "$OUTPUT_DIR" > /dev/null; then |
| echo "❌ Failed to cd into $OUTPUT_DIR" |
| return 1 |
| fi |
|
|
| |
| |
| |
| echo "Command: snakemake -s \"$smk_file\" -j 1 --forcerun \"$target\" --rerun-incomplete --configfile \"${CONFIG}\" --latency-wait 120 --verbose > logs/${workflow_name}.log 2>&1" |
| echo "" |
|
|
| local start_time=$SECONDS |
|
|
| |
| |
| |
| |
| if snakemake -s "$smk_file" -j 1 --forcerun "$target" --rerun-incomplete --configfile "${CONFIG_PATH}" --latency-wait 120 --verbose > "logs/${workflow_name}.log" 2>&1; then |
| local duration=$((SECONDS - start_time)) |
| echo "" |
| echo "✅ $workflow_name completed successfully in ${duration}s" |
| echo "" |
| |
| |
| if [[ "$VALIDATE_STEPS" == "true" ]]; then |
| echo "Running validation for Step $step_number..." |
| if python check_soln.py --out_dir "${BASE_DIR}" --step $step_number >> "logs/${workflow_name}_validation.log" 2>&1; then |
| echo "✅ Step $step_number validation completed" |
| |
| if [[ -f "${OUTPUT_DIR}/logs/success.npy" ]]; then |
| validation_result=$(python -c "import numpy as np; print(np.load('${OUTPUT_DIR}/logs/success.npy')[$step_number-1])") |
| if [[ "$validation_result" == "1" ]]; then |
| echo "✅ Step $step_number validation: PASS" |
| else |
| echo "❌ Step $step_number validation: FAIL" |
| fi |
| fi |
| else |
| echo "❌ Step $step_number validation failed to run" |
| fi |
| echo "" |
| fi |
| popd > /dev/null |
| return 0 |
| else |
| local duration=$((SECONDS - start_time)) |
| echo "" |
| echo "❌ $workflow_name failed after ${duration}s" |
| echo "" |
| popd > /dev/null |
| return 1 |
| fi |
| } |
|
|
| |
| step_counter=1 |
|
|
| if [[ "$RUN_STEP1" == "true" ]]; then |
| echo "$step_counter. Running summarize_root workflow (both rules)..." |
| |
| run_workflow "summarize_root" "summarize_root_temp.smk" "summarize_root" 1 |
| run_workflow "insert_root_summary" "summarize_root_temp.smk" "insert_root_summary" 1 |
| ((step_counter++)) |
| fi |
|
|
| if [[ "$RUN_STEP2" == "true" ]]; then |
| echo "$step_counter. Running create_numpy workflow..." |
| run_workflow "create_numpy" "create_numpy_temp.smk" "create_numpy" 2 |
| ((step_counter++)) |
| fi |
|
|
| if [[ "$RUN_STEP3" == "true" ]]; then |
| echo "$step_counter. Running preprocess workflow..." |
| run_workflow "preprocess" "preprocess_temp.smk" "preprocess" 3 |
| ((step_counter++)) |
| fi |
|
|
| if [[ "$RUN_STEP4" == "true" ]]; then |
| echo "$step_counter. Running scores workflow..." |
| run_workflow "scores" "scores_temp.smk" "scores" 4 |
| ((step_counter++)) |
| fi |
|
|
| if [[ "$RUN_STEP5" == "true" ]]; then |
| echo "$step_counter. Running categorization workflow..." |
| run_workflow "categorization" "categorization_temp.smk" "categorization" 5 |
| ((step_counter++)) |
| fi |
|
|
| echo "" |
| echo "=== Sequential Execution Complete ===" |
| echo "Check ${OUTPUT_DIR}/ for output files" |
| echo "Check ${OUTPUT_DIR}/logs/*.log files for detailed logs" |
| if [[ "$VALIDATE_STEPS" == "true" ]]; then |
| echo "Check ${OUTPUT_DIR}/logs/*_validation.log files for validation results" |
| fi |
|
|
| |
| if [[ "$RUN_STEP1" == "true" && "$RUN_STEP2" == "true" && "$RUN_STEP3" == "true" && "$RUN_STEP4" == "true" && "$RUN_STEP5" == "true" ]]; then |
| echo "" |
| if [[ "$VALIDATE_STEPS" == "false" ]]; then |
| read -p "Run final comprehensive validation? (y/n): " -n 1 -r |
| echo "" |
| if [[ $REPLY =~ ^[Yy]$ ]]; then |
| echo "Running final comprehensive validation..." |
| python check_soln.py --out_dir ${OUTPUT_DIR} |
| fi |
| else |
| echo "Running final comprehensive validation..." |
| python check_soln.py --out_dir ${OUTPUT_DIR} |
| fi |
| else |
| echo "" |
| echo "Note: Final comprehensive validation skipped (not all steps were run)" |
| fi |
|
|
| |
| echo "" |
| |
| |
| |
| |
| |
|
|
| echo -e "Done!\n" |
|
|