| import json | |
| def get_test_results(file_path): | |
| with open(file_path, 'r', encoding='utf-8') as fp: | |
| data = json.loads(fp.read()) | |
| return data | |
| import re | |
| def get_testcase_idx(results_string): | |
| match = re.search(r"Testcase:\s*(.*)", results_string, re.DOTALL) | |
| if match: | |
| result = int(match.group(1)) | |
| return result | |
| import numpy as np | |
| def clean_array(arr): | |
| # 保留原行和列的索引 | |
| row_indices = list(range(len(arr))) | |
| col_indices = list(range(len(arr[0]))) | |
| # 去除包含 'C' 和 'E' 的行 | |
| arr = [row for row in arr if 'C' not in row and 'E' not in row] | |
| row_indices = [i for i, row in enumerate(arr) if 'C' not in row and 'E' not in row] | |
| # 转置数组,去除全是 'A' 的列 | |
| transposed = list(zip(*arr)) | |
| transposed = [col for col in transposed if not all(x == 'A' for x in col)] | |
| col_indices = [i for i, col in enumerate(transposed) if not all(x == 'A' for x in col)] | |
| # 恢复数组为二维 | |
| arr = [list(row) for row in zip(*transposed)] | |
| return arr, row_indices, col_indices | |
| # test_case_gen_method = "algo" | |
| # file_path = f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/rank_result/tcb-ht-rank5-all.json" | |
| # results_all = get_test_results(file_path) | |
| # matrix_dict = {} | |
| # for key, question in results_all.items(): | |
| # matrix = [] | |
| # for item in question['codes']: | |
| # status_list = [status[0] for status in item["status"]] | |
| # matrix.append(status_list) | |
| # # matrix_dict[key] = matrix | |
| # matrix_dict[key] = {} | |
| # if len(matrix) > 0: | |
| # matrix_dict[key]["matrix"], matrix_dict[key]["code_index"], matrix_dict[key]["test_index"] = clean_array(matrix) | |
| # with open(f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/matrix/ht-rank5-matrix.json", "w", encoding="utf-8") as f: | |
| # json.dump(matrix_dict, f, ensure_ascii=False, indent=4) | |
| # ------------- TestMode Matrix ------------------- | |
| test_case_gen_method = "lcb" | |
| rank = 5 | |
| file_path = f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/rank_result/tcb-{test_case_gen_method}-rank{rank}-all.json" | |
| results_all = get_test_results(file_path) | |
| matrix_dict = {} | |
| count = 0 | |
| all_wrong_list = [] | |
| avg_hack_rate = 0 | |
| saves_re = {} | |
| for key, question in results_all.items(): | |
| wrong_status = "" | |
| details = [] | |
| RE_TLE_MLE_count = 0 | |
| for item in question['codes']: | |
| if item['status'][0] == "AC": | |
| continue | |
| wrong_status += f"{item['status'][0]} " | |
| details.append(item["details"][0]) | |
| if item['status'][0] == "RE" or item['status'][0] == "TLE" or item['status'][0] == "MLE": | |
| RE_TLE_MLE_count += 1 | |
| if RE_TLE_MLE_count >= 3: | |
| saves_re[key] = question | |
| matrix_dict[key] = { | |
| "status": wrong_status, | |
| "details": details, | |
| "tests": list(set(details)), | |
| "hack_rate": round((len(details) / len(question['codes'])) * 100, 2) | |
| } | |
| avg_hack_rate += matrix_dict[key]['hack_rate'] | |
| if len(list(set(details))) == 1: | |
| count += 1 | |
| if matrix_dict[key]['hack_rate'] == 100.00: | |
| count += 1 | |
| all_wrong_list.append(key) | |
| matrix_dict = sorted(matrix_dict.items(), key=lambda x: (x[1]["hack_rate"], - len(x[1]["tests"]))) | |
| # with open(f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/matrix/{test_case_gen_method}-rank{rank}-matrix.json", "w", encoding="utf-8") as f: | |
| # json.dump(matrix_dict, f, ensure_ascii=False, indent=4) | |
| with open(f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/RE_TLE_MLE_items.json", "w", encoding="utf-8") as f: | |
| json.dump(saves_re, f, ensure_ascii=False, indent=4) | |
| with open("rank_result.md", "w") as file: | |
| file.write(str(list(saves_re.keys()))) | |
| # with open("all_wrong_list.txt", "w", encoding="utf-8") as file: | |
| # for item in all_wrong_list: | |
| # file.write(item + "\n") | |
| print(f"avg hack Rate {round(avg_hack_rate / len(matrix_dict), 2)}") |
Xet Storage Details
- Size:
- 3.92 kB
- Xet hash:
- 312fa00bc800e2ce20f10f4ca27556272010ef261f4f6169ed45a4f8cd194371
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.