download
raw
3.92 kB
import json
def get_test_results(file_path):
with open(file_path, 'r', encoding='utf-8') as fp:
data = json.loads(fp.read())
return data
import re
def get_testcase_idx(results_string):
match = re.search(r"Testcase:\s*(.*)", results_string, re.DOTALL)
if match:
result = int(match.group(1))
return result
import numpy as np
def clean_array(arr):
# 保留原行和列的索引
row_indices = list(range(len(arr)))
col_indices = list(range(len(arr[0])))
# 去除包含 'C' 和 'E' 的行
arr = [row for row in arr if 'C' not in row and 'E' not in row]
row_indices = [i for i, row in enumerate(arr) if 'C' not in row and 'E' not in row]
# 转置数组,去除全是 'A' 的列
transposed = list(zip(*arr))
transposed = [col for col in transposed if not all(x == 'A' for x in col)]
col_indices = [i for i, col in enumerate(transposed) if not all(x == 'A' for x in col)]
# 恢复数组为二维
arr = [list(row) for row in zip(*transposed)]
return arr, row_indices, col_indices
# test_case_gen_method = "algo"
# file_path = f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/rank_result/tcb-ht-rank5-all.json"
# results_all = get_test_results(file_path)
# matrix_dict = {}
# for key, question in results_all.items():
# matrix = []
# for item in question['codes']:
# status_list = [status[0] for status in item["status"]]
# matrix.append(status_list)
# # matrix_dict[key] = matrix
# matrix_dict[key] = {}
# if len(matrix) > 0:
# matrix_dict[key]["matrix"], matrix_dict[key]["code_index"], matrix_dict[key]["test_index"] = clean_array(matrix)
# with open(f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/matrix/ht-rank5-matrix.json", "w", encoding="utf-8") as f:
# json.dump(matrix_dict, f, ensure_ascii=False, indent=4)
# ------------- TestMode Matrix -------------------
test_case_gen_method = "lcb"
rank = 5
file_path = f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/rank_result/tcb-{test_case_gen_method}-rank{rank}-all.json"
results_all = get_test_results(file_path)
matrix_dict = {}
count = 0
all_wrong_list = []
avg_hack_rate = 0
saves_re = {}
for key, question in results_all.items():
wrong_status = ""
details = []
RE_TLE_MLE_count = 0
for item in question['codes']:
if item['status'][0] == "AC":
continue
wrong_status += f"{item['status'][0]} "
details.append(item["details"][0])
if item['status'][0] == "RE" or item['status'][0] == "TLE" or item['status'][0] == "MLE":
RE_TLE_MLE_count += 1
if RE_TLE_MLE_count >= 3:
saves_re[key] = question
matrix_dict[key] = {
"status": wrong_status,
"details": details,
"tests": list(set(details)),
"hack_rate": round((len(details) / len(question['codes'])) * 100, 2)
}
avg_hack_rate += matrix_dict[key]['hack_rate']
if len(list(set(details))) == 1:
count += 1
if matrix_dict[key]['hack_rate'] == 100.00:
count += 1
all_wrong_list.append(key)
matrix_dict = sorted(matrix_dict.items(), key=lambda x: (x[1]["hack_rate"], - len(x[1]["tests"])))
# with open(f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/matrix/{test_case_gen_method}-rank{rank}-matrix.json", "w", encoding="utf-8") as f:
# json.dump(matrix_dict, f, ensure_ascii=False, indent=4)
with open(f"/home/i-luoxianzhen/data/TestCase-Gen/eval_wrong_code/RE_TLE_MLE_items.json", "w", encoding="utf-8") as f:
json.dump(saves_re, f, ensure_ascii=False, indent=4)
with open("rank_result.md", "w") as file:
file.write(str(list(saves_re.keys())))
# with open("all_wrong_list.txt", "w", encoding="utf-8") as file:
# for item in all_wrong_list:
# file.write(item + "\n")
print(f"avg hack Rate {round(avg_hack_rate / len(matrix_dict), 2)}")

Xet Storage Details

Size:
3.92 kB
·
Xet hash:
312fa00bc800e2ce20f10f4ca27556272010ef261f4f6169ed45a4f8cd194371

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.