Buckets:

Tsukihjy
/

testcase

Tsukihjy/testcase / testcase-data /eval /testcase_length.py

3.04 kB

	import os
	import json
	from datetime import datetime
	from load_data import get_data, save_back_results, get_tests
	from excute_tool_linux import run_cpp_code_linux, cal_length
	from multiprocessing import Pool, cpu_count
	from tqdm import tqdm
	import logging

	def setup_logging():
	os.makedirs("logs", exist_ok=True)
	log_file = f"logs/execution_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"

	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler(log_file),
	logging.StreamHandler()
	]
	)
	return logging.getLogger()

	def process_code_with_logging(data_item):
	"""包装函数，用于添加日志"""
	problem_id = data_item["problem_id"]
	# code_id = data_item["code_id"]

	try:
	result = cal_length(data_item, test_mode, rank_p)
	status = result.get("error", "Unknown")
	logger.info(f"执行完成 - 问题ID: {problem_id}, 状态: {status}")
	return result
	except Exception as e:
	logger.error(f"执行异常 - 问题ID: {problem_id}, 错误: {str(e)}")
	data_item["error"] = ["EXE"]
	data_item["details"] = str(e)
	return data_item


	def save_results(results, correct_code_output_file, output_file):
	"""保存结果到文件"""
	max_all = 0
	min_all = 0
	avg_all = 0

	count = 0
	for item in results:
	if item['error'] != "success":
	continue
	count += 1
	max_all += item['max']
	min_all += item['min']
	avg_all += item['avg']

	print(f"对比crux \|\| algo max: {max_all / count} min: {min_all / count} avg: {avg_all / count}")

	test_mode = False
	rank_p = 5
	if __name__ == "__main__":
	datasets_name = "tcb"
	testcase_alg = "Sample_wrong-code"

	logger = setup_logging()
	logger.info("开始执行代码评估...")

	model_name = "qwen-coder-plus"

	data = get_tests(name=datasets_name, test_path_al1=f"/home/luoxianzhen/yang/save_tests_claude4-fliter/crux", algo_path=f"/home/luoxianzhen/yang/save_tests_claude-sonnet-4-20250514-new/algo-fliter")
	logger.info(f"加载了 {len(data)} 个代码项目")
	cpu = 50
	# cpu = 1

	logger.info(f"使用 {cpu} 个CPU核心进行并行处理")

	with Pool(cpu) as pool:
	results = list(tqdm(
	pool.imap_unordered(process_code_with_logging, data),
	total=len(data),
	desc="执行进度"
	))
	# results = sorted(results, key=lambda x: (x["problem_id"], x["code_id"]))
	logger.info("所有代码执行完成，开始保存结果...")
	# save_dir = "ALLmode_results" if not test_mode else "rank_result"
	# json.dump(results, open(f"{save_dir}/{datasets_name}-{testcase_alg}-rank{rank_p}.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
	# logger.info(f"结果已保存到 {save_dir}/{datasets_name}-{testcase_alg}-{rank_p}.json")
	save_results(results, correct_code_output_file="", output_file="")

Xet Storage Details

Size:: 3.04 kB
Xet hash:: 4886a73c5f447609bdca66c4caa32c52600133ec83946043d6c6cb4a4c2e4e0b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.