Tsukihjy/testcase / testcase-data /lcb_edge_random.py
download
raw
7.13 kB
import json
import random
def get_random_indices(array_length, num_indices):
# 确保抽取的数量不超过数组长度
if num_indices > array_length:
return random.sample(range(array_length), array_length)
# 使用 random.sample 抽取指定数量的索引
return random.sample(range(array_length), num_indices)
def find_first_non_ac(array):
for element in array:
if element != "AC":
return element
return "AC"
import random
def get_e_r_indices(code, type_of_test="random"):
return [i for i, t in enumerate(code.get("types", [])) if t == type_of_test]
test_als = ["lcb"]
model_name_list = [
"claude4",
"gpt-4o",
]
import os
for model_name in model_name_list:
for test_al in test_als:
edge_hacking_list = {}
random_hacking_list = {}
result_file = f"/home/luoxianzhen/yang/eval_wrong_code/ALLmode_results/tcb-{model_name}-{test_al}-type-{test_al}-rank5-all.json"
if not os.path.exists(result_file):
print(f"{model_name}-{test_al} NOT EXSIT!")
continue
results = json.load(open(result_file, "r", encoding="utf-8"))
rank_result_random = {
"rank5": {"AC":0, "CE": 0, "WA":0, "RE": 0, "TLE":0, "MLE":0,"EXE":0},
}
success_k_random = {
"rank5": {"total": 0, "hacked": 0},
}
rank_result_edge = {
"rank5": {"AC":0, "CE": 0, "WA":0, "RE": 0, "TLE":0, "MLE":0,"EXE":0},
}
success_k_edge = {
"rank5": {"total": 0, "hacked": 0},
}
for k, v in results.items():
rank = len(v['codes'])
random_hacking_list[k] = []
edge_hacking_list[k] = []
for i in range(4, 5):
nums_of_tests = rank * (i + 1)
array_length = max([len(code['status']) for code in v['codes']])
tests_index_random = get_e_r_indices(v['codes'][0], "random")
tests_index_edge = get_e_r_indices(v['codes'][0], "edge")
# tests_index_random = random.sample(tests_index_random, len(tests_index_edge)) if len(tests_index_random) > len(tests_index_edge) else tests_index_random
success_k_random[f"rank{i+1}"]["total"] += rank
success_k_edge[f"rank{i+1}"]["total"] += rank
if array_length == 0:
success_k_random[f"rank{i+1}"]['AC'] += rank
success_k_edge[f"rank{i+1}"]['AC'] += rank
continue
for code in v['codes']:
if not tests_index_random or len(tests_index_random) <= 0:
rank_result_random[f"rank{i+1}"]["AC"] += 1
continue
tests_status = [code['status'][i] for i in tests_index_random] if max(tests_index_random) < len(code['status']) else code['status']
rank_result_random[f"rank{i+1}"][find_first_non_ac(tests_status)] += 1
if find_first_non_ac(tests_status) != "AC":
success_k_random[f"rank{i+1}"]["hacked"] += 1
random_hacking_list[k].append(code['code_id'])
for code in v['codes']:
if not tests_index_edge or len(tests_index_edge) <= 0:
rank_result_edge[f"rank{i+1}"]["AC"] += 1
continue
tests_status = [code['status'][i] for i in tests_index_edge] if max(tests_index_edge) < len(code['status']) else code['status']
rank_result_edge[f"rank{i+1}"][find_first_non_ac(tests_status)] += 1
if find_first_non_ac(tests_status) != "AC":
success_k_edge[f"rank{i+1}"]["hacked"] += 1
edge_hacking_list[k].append(code['code_id'])
# 创建 Markdown 表格
algorithm_model = f"{test_al}({model_name})"
# 创建 Markdown 表格
markdown_table = "| Algorithm & Model | Rank | AC | CE | WA | RE | TLE | MLE | EXE | Hack Rate |\n"
markdown_table += "|-------------------|------|----|----|----|----|-----|-----|-----|-----------|\n"
for rank in rank_result_random:
total = success_k_random[rank]["total"]
hacked = success_k_random[rank]["hacked"]
hack_rate = (hacked / total * 100) if total > 0 else 0
hack_rate = round(hack_rate, 2) # 保留两位小数
# 计算每个状态的百分比和数量
status_percentages = []
for key in rank_result_random[rank]:
count = rank_result_random[rank][key]
percentage = (count / total * 100) if total > 0 else 0
status_percentages.append(f"{percentage:.2f}% ({count})")
# 将每个状态的百分比和数量组合在一起
markdown_table += f"| {algorithm_model} | {rank} | " + " | ".join(status_percentages) + f" | {hack_rate}% |\n"
# # 保存到 .md 文件
# with open(f"/home/luoxianzhen/yang/eval_wrong_code/edge_random/rank_result-{model_name}-{test_al}-type.md", "w") as file:
# file.write(markdown_table)
# print("Markdown 文件已生成: rank_result.md")
for rank in rank_result_edge:
total = success_k_edge[rank]["total"]
hacked = success_k_edge[rank]["hacked"]
hack_rate = (hacked / total * 100) if total > 0 else 0
hack_rate = round(hack_rate, 2) # 保留两位小数
# 计算每个状态的百分比和数量
status_percentages = []
for key in rank_result_edge[rank]:
count = rank_result_edge[rank][key]
percentage = (count / total * 100) if total > 0 else 0
status_percentages.append(f"{percentage:.2f}% ({count})")
# 将每个状态的百分比和数量组合在一起
markdown_table += f"| {algorithm_model} | {rank} | " + " | ".join(status_percentages) + f" | {hack_rate}% |\n"
# 保存到 .md 文件
with open(f"/home/luoxianzhen/yang/eval_wrong_code/edge_random/rank_result-{model_name}-{test_al}-type.md", "w") as file:
file.write(markdown_table)
print("Markdown 文件已生成: rank_result.md")
both_count = 0
random_count = 0
edge_count = 0
for k, v in random_hacking_list.items():
random_hacked = v
edge_hacked = edge_hacking_list[k]
code_ids = list(set(random_hacked + edge_hacked))
for code_id in code_ids:
if code_id in random_hacked and code_id in edge_hacked:
both_count += 1
elif code_id in random_hacked and code_id not in edge_hacked:
random_count += 1
elif code_id not in random_hacked and code_id in edge_hacked:
edge_count += 1
print(f"{model_name}: Random-Only Hacked: {random_count} & Edge-Only Hacked {edge_count} & Both Hacked {both_count}")

Xet Storage Details

Size:
7.13 kB
·
Xet hash:
e7939cb323fa612e41d9dd37b832a52af13610248e2e5d1d85e32cd4029fa242

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.