Buckets:

Tsukihjy
/

testcase

956 Bytes

	cd /home/luoxianzhen/yang/eval_wrong_code

	# 定义算法列表
	# test_als=("lcb" "ht" "algo" "crux" "predo")
	test_als=("lcb")

	# version=("v1" "v2" "v3" "v4" "v5")

	version=("v3" "v4" "v5")

	# 定义模型列表
	model_name_list=(
	"claude-sonnet-4-20250514-thinking"
	# "deepseek-v3"
	# "qwen3-nothink"
	# "claude4"
	# "gpt-4o"
	# "qwen-coder-plus"
	# "Qwen2.5-7B-Instruct"
	# "Qwen2.5-14B-Instruct"
	# "Qwen2.5-32B-Instruct"
	# "Qwen2.5-Coder-7B-Instruct"
	# "Qwen2.5-Coder-14B-Instruct"
	# "Qwen2.5-Coder-32B-Instruct"
	)

	# 双层循环执行
	for model in "${model_name_list[@]}"; do
	for alg in "${test_als[@]}"; do
	for ver in in "${version[@]}"; do
	echo "Running testcase_alg=$alg model_name=$model"
	python parallel_exe_all_wrong_code.py \
	--testcase_alg "$alg" \
	--model_name "$model" \
	--version "$ver"
	done
	done
	done

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.