Tsukihjy/testcase / testcase-data /eval_all_wrong_code.sh
download
raw
956 Bytes
cd /home/luoxianzhen/yang/eval_wrong_code
# 定义算法列表
# test_als=("lcb" "ht" "algo" "crux" "predo")
test_als=("lcb")
# version=("v1" "v2" "v3" "v4" "v5")
version=("v3" "v4" "v5")
# 定义模型列表
model_name_list=(
"claude-sonnet-4-20250514-thinking"
# "deepseek-v3"
# "qwen3-nothink"
# "claude4"
# "gpt-4o"
# "qwen-coder-plus"
# "Qwen2.5-7B-Instruct"
# "Qwen2.5-14B-Instruct"
# "Qwen2.5-32B-Instruct"
# "Qwen2.5-Coder-7B-Instruct"
# "Qwen2.5-Coder-14B-Instruct"
# "Qwen2.5-Coder-32B-Instruct"
)
# 双层循环执行
for model in "${model_name_list[@]}"; do
for alg in "${test_als[@]}"; do
for ver in in "${version[@]}"; do
echo "Running testcase_alg=$alg model_name=$model"
python parallel_exe_all_wrong_code.py \
--testcase_alg "$alg" \
--model_name "$model" \
--version "$ver"
done
done
done

Xet Storage Details

Size:
956 Bytes
·
Xet hash:
8e0ba91ee508336d659551bd163b69559f718f878085282a390010afec0eb0e0

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.