File size: 4,756 Bytes
46011b7 1c6af55 46011b7 1c6af55 46011b7 1c6af55 46011b7 26d82a2 46011b7 26d82a2 46011b7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | import json, pathlib, time, os
level1_path = pathlib.Path('src/deceit_env/data/level1.jsonl')
level2_path = pathlib.Path('src/deceit_env/data/level2.jsonl')
# Load level1
rows = []
with open(level1_path) as f:
for line in f:
line = line.strip()
if line:
rows.append(json.loads(line))
# Load already generated
level2_rows = []
if level2_path.exists():
with open(level2_path) as f:
for line in f:
line = line.strip()
if line:
level2_rows.append(json.loads(line))
existing_ids = {r['id'] for r in level2_rows}
print(f"Loaded {len(rows)} level1 questions. Already have {len(existing_ids)} in level2.")
# Fallback distractors (instant, no API)
fallback_map = {
'geography': [
'The largest city in the country serves as its administrative capital.',
'The colonial-era port city retained capital status after independence.'
],
'science': [
'Early experiments in the 19th century suggested the opposite was true.',
'Recent studies have challenged this long-held scientific consensus.'
],
'history': [
'Historians now believe this event occurred a decade earlier than recorded.',
'Primary sources from the era suggest a different outcome was reached.'
],
'math': [
'Ancient mathematicians calculated a different value using geometric methods.',
'Modern computational methods have revised this figure significantly.'
],
'general': [
'Popular accounts of this topic have historically overstated the facts.',
'The commonly cited figure was revised following more recent research.'
]
}
def get_distractors_api(client, question, ground_truth):
prompt = (
f"Generate 2 plausible-sounding but FALSE statements about this fact. "
f"Sound authoritative but be wrong. "
f"Fact: {question} Answer: {ground_truth}. "
f"Return ONLY a JSON array of 2 strings."
)
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": prompt}],
max_tokens=200,
temperature=0.9,
)
raw = response.choices[0].message.content.strip()
result = json.loads(raw)
if isinstance(result, list) and len(result) == 2:
return [str(r) for r in result]
raise ValueError(f"Bad format: {raw}")
# Try API first, fall back to static
api_available = False
client = None
try:
from openai import OpenAI
api_key = os.environ.get("OPENAI_API_KEY", "")
if api_key and api_key != "your-openai-key-here":
client = OpenAI(api_key=api_key)
api_available = True
print("OpenAI client ready — will try API first, fallback to static on rate limit")
except Exception as e:
print(f"OpenAI not available: {e} — using static fallback for all")
new_count = 0
fallback_count = 0
for i, row in enumerate(rows):
if row['id'] in existing_ids:
continue
category = row.get('category', 'general')
distractors = None
# Try API
if api_available and client:
for attempt in range(3):
try:
distractors = get_distractors_api(client, row['question'], row['ground_truth'])
break
except Exception as e:
if "429" in str(e) or "rate" in str(e).lower():
print(f" Rate limit on {row['id']} (attempt {attempt+1}/3), using fallback...")
distractors = None
break # Don't retry — use fallback immediately
else:
print(f" API error on {row['id']}: {e} — using fallback")
distractors = None
break
# Fallback to static
if distractors is None:
distractors = fallback_map.get(category, fallback_map['general'])
fallback_count += 1
level2_rows.append({
'id': row['id'],
'question': row['question'],
'ground_truth': row['ground_truth'],
'category': category,
'distractors': distractors
})
existing_ids.add(row['id'])
new_count += 1
# Save every 10
if new_count % 10 == 0:
with open(level2_path, 'w') as f:
for r in level2_rows:
f.write(json.dumps(r) + '\n')
print(f" Saved {new_count} new entries ({fallback_count} used fallback)")
time.sleep(0.5)
# Final save
with open(level2_path, 'w') as f:
for r in level2_rows:
f.write(json.dumps(r) + '\n')
print(f"\nDone!")
print(f" Total in level2.jsonl: {len(level2_rows)}")
print(f" New this run: {new_count}")
print(f" Used API: {new_count - fallback_count}")
print(f" Used fallback: {fallback_count}") |