| import os | |
| import json | |
| from datetime import datetime | |
| from load_data import get_data, save_back_results, get_tests | |
| from excute_tool_linux import run_cpp_code_linux, cal_length | |
| from multiprocessing import Pool, cpu_count | |
| from tqdm import tqdm | |
| import logging | |
| def setup_logging(): | |
| os.makedirs("logs", exist_ok=True) | |
| log_file = f"logs/execution_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.FileHandler(log_file), | |
| logging.StreamHandler() | |
| ] | |
| ) | |
| return logging.getLogger() | |
| def process_code_with_logging(data_item): | |
| """包装函数,用于添加日志""" | |
| problem_id = data_item["problem_id"] | |
| # code_id = data_item["code_id"] | |
| try: | |
| result = cal_length(data_item, test_mode, rank_p) | |
| status = result.get("error", "Unknown") | |
| logger.info(f"执行完成 - 问题ID: {problem_id}, 状态: {status}") | |
| return result | |
| except Exception as e: | |
| logger.error(f"执行异常 - 问题ID: {problem_id}, 错误: {str(e)}") | |
| data_item["error"] = ["EXE"] | |
| data_item["details"] = str(e) | |
| return data_item | |
| def save_results(results, correct_code_output_file, output_file): | |
| """保存结果到文件""" | |
| max_all = 0 | |
| min_all = 0 | |
| avg_all = 0 | |
| count = 0 | |
| for item in results: | |
| if item['error'] != "success": | |
| continue | |
| count += 1 | |
| max_all += item['max'] | |
| min_all += item['min'] | |
| avg_all += item['avg'] | |
| print(f"对比crux || algo max: {max_all / count} min: {min_all / count} avg: {avg_all / count}") | |
| test_mode = False | |
| rank_p = 5 | |
| if __name__ == "__main__": | |
| datasets_name = "tcb" | |
| testcase_alg = "Sample_wrong-code" | |
| logger = setup_logging() | |
| logger.info("开始执行代码评估...") | |
| model_name = "qwen-coder-plus" | |
| data = get_tests(name=datasets_name, test_path_al1=f"/home/luoxianzhen/yang/save_tests_claude4-fliter/crux", algo_path=f"/home/luoxianzhen/yang/save_tests_claude-sonnet-4-20250514-new/algo-fliter") | |
| logger.info(f"加载了 {len(data)} 个代码项目") | |
| cpu = 50 | |
| # cpu = 1 | |
| logger.info(f"使用 {cpu} 个CPU核心进行并行处理") | |
| with Pool(cpu) as pool: | |
| results = list(tqdm( | |
| pool.imap_unordered(process_code_with_logging, data), | |
| total=len(data), | |
| desc="执行进度" | |
| )) | |
| # results = sorted(results, key=lambda x: (x["problem_id"], x["code_id"])) | |
| logger.info("所有代码执行完成,开始保存结果...") | |
| # save_dir = "ALLmode_results" if not test_mode else "rank_result" | |
| # json.dump(results, open(f"{save_dir}/{datasets_name}-{testcase_alg}-rank{rank_p}.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False) | |
| # logger.info(f"结果已保存到 {save_dir}/{datasets_name}-{testcase_alg}-{rank_p}.json") | |
| save_results(results, correct_code_output_file="", output_file="") |
Xet Storage Details
- Size:
- 3.04 kB
- Xet hash:
- 4886a73c5f447609bdca66c4caa32c52600133ec83946043d6c6cb4a4c2e4e0b
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.