Tsukihjy/testcase / testcase-data /get_ce_code.py
download
raw
1.49 kB
import json
import sys
sys.path.append("/home/luoxianzhen/yang/methods/utils")
from dataset_all import get_datasets_by_name
import os
def get_testcases(testcase_path):
data = []
if not os.path.exists(testcase_path):
return {}
with open(testcase_path, 'r', encoding='utf-8') as f:
for line in f:
if line.strip():
data.append(json.loads(line))
return data[0]
tcb_datasets = get_datasets_by_name("ours")
tcb_datasets_dict = {}
for item in tcb_datasets:
tcb_datasets_dict[item['tcb_id']] = item
name = "sample"
test_result_all_file = "data/Ours/tcb-wrong-code-new-lcb-rank5-all.json"
wrong_code_total = 0
test_result_all = json.load(open(test_result_all_file, "r"))
ce_list = []
for k, item in test_result_all.items():
for wrong in item['codes']:
if wrong['status'] != ["AC"]:
if wrong['status'] == ["WA"] and wrong['details'] != ["WA: Testcase:0 output: "]:
continue
wrong['tcb_id'] = k
wrong["query"] = tcb_datasets_dict[k]['query']
wrong['test'] = tcb_datasets_dict[k]['sample']
ce_list.append(wrong)
json.dump(ce_list[0:20], open(f"/home/luoxianzhen/yang/eval_wrong_code/wrong-code-to-fix-v23-sections1.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)
json.dump(ce_list[20:], open(f"/home/luoxianzhen/yang/eval_wrong_code/wrong-code-to-fix-v23-sections2.json", "w", encoding="utf-8"), indent=4, ensure_ascii=False)

Xet Storage Details

Size:
1.49 kB
·
Xet hash:
38ddd4c1ee9d5784dabe595d1344412560c29f31db5c87caa85814a5da6582e7

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.