Tsukihjy/testcase / methods /utils /content_filter.py
download
raw
933 Bytes
from dataset_all import get_ours
import json
def write_json_to_file(data, filepath):
with open(filepath, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
res = get_ours()
import re
import os
pattern = r'\[.*?\]\(https?://[^\s)]+?\.(?:jpg|jpeg|png|gif|bmp|webp)\)'
# tests_path = '/home/i-luoxianzhen/data/TestCase-Gen/saved_tests/lcb/lcbtests-{}.jsonl'
# re.search(pattern, text)
# ques = "#2621. 「JSOI2008」星球大战"
count = 0
pos = 0
pic_ques = []
for item in res:
id = item['problem_id']
query = item['query']
if bool(re.search(pattern, query)):
pic_ques.append(item)
# if not os.path.exists(tests_path.format(id)):
# print(pos, id)
# if ques in id or id in ques or ques == id:
# print(pos, id)
pos += 1
# print(count)
write_json_to_file(pic_ques, '/home/i-luoxianzhen/data/TestCase-Gen/data/Ours/question_with_pic.json')

Xet Storage Details

Size:
933 Bytes
·
Xet hash:
52743cd27f4fc1865c514d46f206fa756a254d3337eb4dab57785f7d84fa4582

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.