| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| NAME=$(grep '^name:' config.yml | awk '{print$2}' | tr -d "'") |
| MODEL=$(grep '^model:' config.yml | awk '{print$2}' | tr -d "'") |
| OUT_DIR=$(grep '^out_dir:' config.yml | awk '{print$2}' | tr -d "'") |
|
|
| cp -r prompts prompts_temp |
| sed -i "s#{BASE_DIR}#$OUT_DIR#g" prompts_temp/*.txt |
|
|
| |
| cp workflow/summarize_root.smk summarize_root_temp.smk |
| cp workflow/create_numpy.smk create_numpy_temp.smk |
| cp workflow/preprocess.smk preprocess_temp.smk |
| cp workflow/scores.smk scores_temp.smk |
| cp workflow/categorization.smk categorization_temp.smk |
| sed -i "s#{BASE_DIR}#$OUT_DIR#g" *_temp.smk |
|
|
| mkdir -p $OUT_DIR/generated_code |
| cp utils.py $OUT_DIR/generated_code/utils.py |
|
|
| rm -f $OUT_DIR/logs/success.npy |
| rm -f $OUT_DIR/logs/calls.npy |
| rm -f $OUT_DIR/logs/input_tokens.npy |
| rm -f $OUT_DIR/logs/output_tokens.npy |
|
|
| echo "Starting all tasks in parallel using GNU parallel..." |
| echo "Running 5 independent tasks concurrently (no job limits)" |
| echo "Tasks: summarize_root (2 steps), create_numpy, preprocess, scores, categorization" |
| echo "" |
|
|
| |
| START_TIME=$SECONDS |
|
|
| |
| run_task() { |
| local task_name=$1 |
| local smk_file=$2 |
| local target1=$3 |
| local target2=$4 |
| local log_file="$OUT_DIR/${task_name}.log" |
| local time_file="$OUT_DIR/${task_name}.time" |
|
|
| echo "Starting $task_name..." |
| local task_start=$SECONDS |
|
|
| |
| if ! snakemake -s "$smk_file" -j 1 --forcerun "$target1" --rerun-incomplete --configfile config.yml --latency-wait 120 --verbose > "$log_file" 2>&1; then |
| local task_time=$((SECONDS - task_start)) |
| echo "$task_time" > "$time_file" |
| echo "ERROR: $task_name failed on $target1 after $task_time seconds" |
| return 1 |
| fi |
|
|
| |
| if [ -n "$target2" ]; then |
| echo "Running $task_name second stage: $target2..." |
| if ! snakemake -s "$smk_file" -j 1 --forcerun "$target2" --rerun-incomplete --configfile config.yml --latency-wait 120 --verbose >> "$log_file" 2>&1; then |
| local task_time=$((SECONDS - task_start)) |
| echo "$task_time" > "$time_file" |
| echo "ERROR: $task_name failed on $target2 after $task_time seconds" |
| return 1 |
| fi |
| fi |
|
|
| local task_time=$((SECONDS - task_start)) |
| echo "$task_time" > "$time_file" |
| echo "$task_name completed successfully in $task_time seconds" |
| return 0 |
| } |
| export -f run_task |
| export OUT_DIR |
| export CONFIG_FILE=config.yml |
|
|
| |
| export PYTHONPATH="$OUT_DIR:$PYTHONPATH" |
|
|
| |
| |
| |
| parallel --no-notice --halt soon,fail=1 --line-buffer ::: \ |
| "run_task summarize_root summarize_root_temp.smk summarize_root insert_root_summary" \ |
| "run_task create_numpy create_numpy_temp.smk create_numpy" \ |
| "run_task preprocess preprocess_temp.smk preprocess" \ |
| "run_task scores scores_temp.smk scores" \ |
| "run_task categorization categorization_temp.smk categorization" |
|
|
| |
| if [ $? -eq 0 ]; then |
| echo "All tasks completed successfully" |
| else |
| echo "ERROR: One or more tasks failed!" |
| |
| for log in "$OUT_DIR"/*.log; do |
| if [ -f "$log" ] && grep -q "ERROR\|failed" "$log"; then |
| echo "=== Failed task log: $(basename "$log") ===" |
| tail -20 "$log" |
| fi |
| done |
| exit 1 |
| fi |
|
|
| |
| TOTAL_TIME=$((SECONDS-START_TIME)) |
| echo "Total time: $TOTAL_TIME seconds" |
|
|
| echo "Checking results" |
| python check_soln.py --out_dir $OUT_DIR |
|
|
| echo "Writing stats" |
|
|
| |
| TIME1=$(cat "$OUT_DIR/summarize_root.time" 2>/dev/null || echo "0") |
| TIME2=$(cat "$OUT_DIR/create_numpy.time" 2>/dev/null || echo "0") |
| TIME3=$(cat "$OUT_DIR/preprocess.time" 2>/dev/null || echo "0") |
| TIME4=$(cat "$OUT_DIR/scores.time" 2>/dev/null || echo "0") |
| TIME5=$(cat "$OUT_DIR/categorization.time" 2>/dev/null || echo "0") |
|
|
| echo "Task times: summarize_root=${TIME1}s, create_numpy=${TIME2}s, preprocess=${TIME3}s, scores=${TIME4}s, categorization=${TIME5}s" |
|
|
| |
| read -r -a success_arr < <(python get_arr.py --name success --out_dir $OUT_DIR) |
| SUCCESS1=${success_arr[0]:-0} |
| SUCCESS2=${success_arr[1]:-0} |
| SUCCESS3=${success_arr[2]:-0} |
| SUCCESS4=${success_arr[3]:-0} |
| SUCCESS5=${success_arr[4]:-0} |
|
|
| read -r -a call_arr < <(python get_arr.py --name calls --out_dir $OUT_DIR) |
| CALLS1=${call_arr[0]:-0} |
| CALLS2=${call_arr[1]:-0} |
| CALLS3=${call_arr[2]:-0} |
| CALLS4=${call_arr[3]:-0} |
| CALLS5=${call_arr[4]:-0} |
|
|
| read -r -a input_token_arr < <(python get_arr.py --name input_tokens --out_dir $OUT_DIR) |
| INPUT_TOKENS1=${input_token_arr[0]:-0} |
| INPUT_TOKENS2=${input_token_arr[1]:-0} |
| INPUT_TOKENS3=${input_token_arr[2]:-0} |
| INPUT_TOKENS4=${input_token_arr[3]:-0} |
| INPUT_TOKENS5=${input_token_arr[4]:-0} |
|
|
| read -r -a output_token_arr < <(python get_arr.py --name output_tokens --out_dir $OUT_DIR) |
| OUTPUT_TOKENS1=${output_token_arr[0]:-0} |
| OUTPUT_TOKENS2=${output_token_arr[1]:-0} |
| OUTPUT_TOKENS3=${output_token_arr[2]:-0} |
| OUTPUT_TOKENS4=${output_token_arr[3]:-0} |
| OUTPUT_TOKENS5=${output_token_arr[4]:-0} |
|
|
| |
| python update_stats.py --name $NAME \ |
| --success1 $SUCCESS1 --time1 $TIME1 --calls1 $CALLS1 --input_tokens1 $INPUT_TOKENS1 \ |
| --success2 $SUCCESS2 --time2 $TIME2 --calls2 $CALLS2 --input_tokens2 $INPUT_TOKENS2 \ |
| --success3 $SUCCESS3 --time3 $TIME3 --calls3 $CALLS3 --input_tokens3 $INPUT_TOKENS3 \ |
| --success4 $SUCCESS4 --time4 $TIME4 --calls4 $CALLS4 --input_tokens4 $INPUT_TOKENS4 \ |
| --success5 $SUCCESS5 --time5 $TIME5 --calls5 $CALLS5 --input_tokens5 $INPUT_TOKENS5 \ |
| --output_tokens1 $OUTPUT_TOKENS1 --output_tokens2 $OUTPUT_TOKENS2 --output_tokens3 $OUTPUT_TOKENS3 \ |
| --output_tokens4 $OUTPUT_TOKENS4 --output_tokens5 $OUTPUT_TOKENS5 |
|
|
| |
| rm -r prompts_temp |
| rm summarize_root_temp.smk |
| rm create_numpy_temp.smk |
| rm preprocess_temp.smk |
| rm scores_temp.smk |
| rm categorization_temp.smk |
| rm -f "$OUT_DIR"/*.time |
|
|
| echo "Finished!" |
|
|