| import json | |
| import os | |
| def get_testcases(testcase_path): | |
| data = [] | |
| if not os.path.exists(testcase_path): | |
| return [] | |
| with open(testcase_path, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| if line.strip(): | |
| data.append(json.loads(line)) | |
| return data | |
| import random | |
| def get_data(name="tcb", prefix_dir=None, testcase_alg = ""): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| res = [] | |
| for item in ds: | |
| # if item['tcb_id'] not in subset: | |
| # continue | |
| testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl") | |
| # testcases = [item['sample'], ] | |
| # if not os.path.exists(testcases): | |
| # continue | |
| for idx, c in enumerate(item["wrong_code"]): | |
| res.append({ | |
| "code": c['code'], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "time_limit": item["runtime_limit"], | |
| "memory_limit": item["memory_limit"], | |
| "test_cases": testcases, | |
| "problem_id": item['tcb_id'], | |
| "code_id": idx, | |
| "rank": len(item["wrong_code"]), | |
| }) | |
| return res | |
| def get_data_edge_and_random(name="tcb", prefix_dir=None, testcase_alg = ""): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| ds = ds[0:100] | |
| res = [] | |
| for item in ds: | |
| testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl") | |
| testcases = get_testcases(testcases) | |
| if len(testcases) > len(item["wrong_code"]): | |
| testcases = random.sample(testcases, len(item["wrong_code"])) | |
| for idx, c in enumerate(item["wrong_code"]): | |
| res.append({ | |
| "code": c['code'], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "time_limit": item["runtime_limit"], | |
| "memory_limit": item["memory_limit"], | |
| "test_cases": testcases, | |
| "problem_id": item['tcb_id'], | |
| "code_id": idx, | |
| "rank": len(item["wrong_code"]), | |
| }) | |
| return res | |
| def load_all_wrong_code_subset(name="tcb", prefix_dir=None, testcase_alg = ""): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| tcb_id_transform = {} | |
| for item in ds: | |
| tcb_id_transform[item['wrong_code'][0]['problem']] = item['tcb_id'] | |
| all_wrong_code = json.load(open("/home/luoxianzhen/yang/data/Ours/all_wrong_code/data/all_wrong_code_subset.json", "r", encoding="utf-8")) | |
| print([{item['name']: len(item['all_wrng_code'])} for item in all_wrong_code]) | |
| res = [] | |
| None_count = 0 | |
| for item in all_wrong_code: | |
| testcases = os.path.join(prefix_dir, f"tests-{tcb_id_transform[item['name']]}.jsonl") | |
| for idx, c in enumerate(item["all_wrng_code"]): | |
| res.append({ | |
| "code": c['code'], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "time_limit": 1, | |
| "memory_limit": 512, | |
| "test_cases": testcases, | |
| "problem_id": tcb_id_transform[item['name']], | |
| "code_id": idx, | |
| "rank": len(item["all_wrng_code"]), | |
| }) | |
| print(None_count) | |
| return res | |
| def calculate_a_proportion(s): | |
| if not s: | |
| return 0.0 | |
| count_a = s.count('A') | |
| total_length = len(s) | |
| return count_a / total_length | |
| def get_tcg_select_wc(wrong_codes): | |
| save_list = [] | |
| for code in wrong_codes: | |
| if calculate_a_proportion(code['output_str']) > 0.6 and code['lang'] == 'cpp': | |
| save_list.append(code) | |
| return save_list | |
| def load_tcg_bench_subset(name="tcb", prefix_dir=None, testcase_alg = "", version="v1"): | |
| all_wrong_code = json.load(open(f"/mmu_nlp_hdd/yang/data/rebuttal/TCG/TCGBench-wc-{version}.json", "r", encoding="utf-8")) | |
| res = [] | |
| for item in all_wrong_code: | |
| testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl") | |
| for idx, c in enumerate(item["wrong_code"]): | |
| res.append({ | |
| "code": c['code'], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "time_limit": 1, | |
| "memory_limit": 512, | |
| "test_cases": testcases, | |
| "problem_id": item['tcb_id'], | |
| "code_id": idx, | |
| "rank": len(item["wrong_code"]), | |
| }) | |
| return res | |
| def save_back_results(problem_results, name="tcb", save_dir="results"): | |
| if name == "codeforces": | |
| ds = json.load(open("/home/luoxianzhen/yang/data/codeforces/codeforces-cots_38k_extracted.json", "r", encoding="utf-8")) | |
| for problem_id, v in problem_results.items(): | |
| ds[problem_id]["res"] = [{"status": code_info["status"], "details": code_info["details"]} for code_info in v["codes"]] | |
| json.dump(ds, open("/home/luoxianzhen/yang/data/codeforces/codeforces-cots_38k_extracted_executed.json", "w", encoding="utf-8"), indent=2, ensure_ascii=False) | |
| if "tcb" in name: | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| ds_dict = {} | |
| for item in ds: | |
| ds_dict[item['tcb_id']] = item | |
| for problem_id, v in problem_results.items(): | |
| ds_dict[problem_id]["res"] = [{"status": code_info["status"], "details": code_info["details"]} for code_info in v["codes"]] | |
| json.dump(ds, open(f"/home/luoxianzhen/yang/eval_wrong_code/{save_dir}/{name}-extracted_executed.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False) | |
| def get_count(name="tcb", prefix_dir=None, testcase_alg = ""): | |
| problem_count = 0 | |
| wrong_code_count = 0 | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| res = [] | |
| for item in ds: | |
| testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl") | |
| if not os.path.exists(testcases): | |
| continue | |
| problem_count += 1 | |
| wrong_code_count += len(item["wrong_code"]) | |
| for idx, c in enumerate(item["wrong_code"]): | |
| res.append({ | |
| "code": c['code'], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "time_limit": item["runtime_limit"], | |
| "memory_limit": item["memory_limit"], | |
| "test_cases": testcases, | |
| "problem_id": item['tcb_id'], | |
| "code_id": idx, | |
| "rank": len(item["wrong_code"]), | |
| }) | |
| print(problem_count, wrong_code_count) | |
| return res | |
| def get_cpu_subset(name="tcb", prefix_dir=None, testcase_alg = ""): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| subset = json.load(open("/home/luoxianzhen/yang/eval_wrong_code/cpu_subset.json", "r", encoding="utf-8")) | |
| res = [] | |
| for item in ds: | |
| testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl") | |
| for idx, c in enumerate(item["wrong_code"]): | |
| if idx not in subset[item['tcb_id']]: | |
| continue | |
| subset[item['tcb_id']].remove(idx) | |
| res.append({ | |
| "code": c['code'], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "time_limit": item["runtime_limit"], | |
| "memory_limit": item["memory_limit"], | |
| "test_cases": testcases, | |
| "problem_id": item['tcb_id'], | |
| "code_id": idx, | |
| "rank": len(item["wrong_code"]), | |
| }) | |
| print(len(res)) | |
| return res | |
| def get_subset(name="tcb", prefix_dir=None, testcase_alg = ""): | |
| ds = json.load(open("/home/luoxianzhen/yang/data/Ours/TestcaseBench-v28.json", "r", encoding="utf-8")) | |
| subset = ['秘密袭击', '最大公约数', '种树 Growing Trees', '反质数序列', '小 Y 和恐怖的奴隶主', '混合果汁', '迷宫探险', '儒略日', '拼图', 'DFS 序 2', '转圈游戏', '城池攻占', '潜入行动', '简单算术', '基因串', '最小公倍数', '数三角形', '战争调度', '与众不同', '钓鱼', '对称轴 Axes of Symmetry', '唱、跳、rap 和篮球', '降雨量', '餐巾计划', 'LJJ 的字符串', '无源汇有上下界可行流', '最短不公共子串', '任务安排 1', '道路堵塞', '聪明的燕姿', '你的名字', '塔', '遗失的答案', '字母 Letters', '林克卡特树', '石头花园 Rock Garden', '分配问题', 'Seek the Name, Seek the Fame', '炮兵阵地', 'xor', '最大连续和', '树上询问', '转化', '小奇采药', '数列递推', 'Divide', '崂山白花蛇草水', '猜数游戏', '花匠', '小 Q 的草稿', '架设电话线', '格雷码', 'Minimax', '喷水装置', '画框', '人造情感', 'Identity Theft', 'A + B 问题', 'Sim', '回文子串', '修剪草坪', '生日礼物', '填树', '题', '不同的最小割', '勘破神机', '地铁交通', '书法家', '数列互质', 'Sumdiv', '舞会', '动态图连通性', '伪光滑数', '庆典', '货车运输', '数的划分', '老 C 的任务', '吃', 'Transport', '网络协议', '活动安排', "Bessie's Snow Cow", '旅行者', '三元组', '普通平衡树', '取石子游戏 2', '领导集团问题', '滚榜', '飞镖', '抛硬币', '逛公园', '棘手的操作', '大工程', '镜面通道', '炸弹攻击 2', '找爸爸', '线性代数', '小凸玩密室', 'Circus', 'A 的 B 次方'] | |
| res = [] | |
| for item in ds: | |
| if item['tcb_id'] not in subset: | |
| continue | |
| testcases = os.path.join(prefix_dir, f"tests-{item['tcb_id']}.jsonl") | |
| testcases = get_testcases(testcases) | |
| # if len(item["wrong_code"]) < len(testcases): | |
| # testcases = random.sample(testcases, len(item["wrong_code"])) | |
| for idx, c in enumerate(item["wrong_code"]): | |
| res.append({ | |
| "code": c['code'], | |
| "compileAndRunOptions": c["compileAndRunOptions"], | |
| "time_limit": item["runtime_limit"], | |
| "memory_limit": item["memory_limit"], | |
| "test_cases": testcases, | |
| "problem_id": item['tcb_id'], | |
| "code_id": idx, | |
| "rank": len(item["wrong_code"]), | |
| }) | |
| print(len(res)) | |
| return res | |
| if __name__ == "__main__": | |
| # ds = json.load(open("/home/luoxianzhen/yang/eval_wrong_code/results/all_results.json")) | |
| # save_back_results(ds, name="codeforces") | |
| # print("Data loaded and saved back successfully.") | |
| data = load_all_wrong_code_subset(name="tcb", prefix_dir=f"/home/luoxianzhen/yang/save_tests_gpt-4o-type/lcb/", testcase_alg="algo") | |
Xet Storage Details
- Size:
- 10.8 kB
- Xet hash:
- f73545498b9bfb193e7f77786e69ba5ea4151fbf1f9085e15b8261929ab0e4a7
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.