Tsukihjy/testcase / testcase-data /eval /testcase_length.py
download
raw
3.04 kB
import os
import json
from datetime import datetime
from load_data import get_data, save_back_results, get_tests
from excute_tool_linux import run_cpp_code_linux, cal_length
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import logging
def setup_logging():
os.makedirs("logs", exist_ok=True)
log_file = f"logs/execution_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_file),
logging.StreamHandler()
]
)
return logging.getLogger()
def process_code_with_logging(data_item):
"""包装函数,用于添加日志"""
problem_id = data_item["problem_id"]
# code_id = data_item["code_id"]
try:
result = cal_length(data_item, test_mode, rank_p)
status = result.get("error", "Unknown")
logger.info(f"执行完成 - 问题ID: {problem_id}, 状态: {status}")
return result
except Exception as e:
logger.error(f"执行异常 - 问题ID: {problem_id}, 错误: {str(e)}")
data_item["error"] = ["EXE"]
data_item["details"] = str(e)
return data_item
def save_results(results, correct_code_output_file, output_file):
"""保存结果到文件"""
max_all = 0
min_all = 0
avg_all = 0
count = 0
for item in results:
if item['error'] != "success":
continue
count += 1
max_all += item['max']
min_all += item['min']
avg_all += item['avg']
print(f"对比crux || algo max: {max_all / count} min: {min_all / count} avg: {avg_all / count}")
test_mode = False
rank_p = 5
if __name__ == "__main__":
datasets_name = "tcb"
testcase_alg = "Sample_wrong-code"
logger = setup_logging()
logger.info("开始执行代码评估...")
model_name = "qwen-coder-plus"
data = get_tests(name=datasets_name, test_path_al1=f"/home/luoxianzhen/yang/save_tests_claude4-fliter/crux", algo_path=f"/home/luoxianzhen/yang/save_tests_claude-sonnet-4-20250514-new/algo-fliter")
logger.info(f"加载了 {len(data)} 个代码项目")
cpu = 50
# cpu = 1
logger.info(f"使用 {cpu} 个CPU核心进行并行处理")
with Pool(cpu) as pool:
results = list(tqdm(
pool.imap_unordered(process_code_with_logging, data),
total=len(data),
desc="执行进度"
))
# results = sorted(results, key=lambda x: (x["problem_id"], x["code_id"]))
logger.info("所有代码执行完成,开始保存结果...")
# save_dir = "ALLmode_results" if not test_mode else "rank_result"
# json.dump(results, open(f"{save_dir}/{datasets_name}-{testcase_alg}-rank{rank_p}.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
# logger.info(f"结果已保存到 {save_dir}/{datasets_name}-{testcase_alg}-{rank_p}.json")
save_results(results, correct_code_output_file="", output_file="")

Xet Storage Details

Size:
3.04 kB
·
Xet hash:
4886a73c5f447609bdca66c4caa32c52600133ec83946043d6c6cb4a4c2e4e0b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.