Spaces:

Ajsaxena
/

DECEIT

Configuration error

File size: 4,756 Bytes

import json, pathlib, time, os

level1_path = pathlib.Path('src/deceit_env/data/level1.jsonl')
level2_path = pathlib.Path('src/deceit_env/data/level2.jsonl')

# Load level1
rows = []
with open(level1_path) as f:
    for line in f:
        line = line.strip()
        if line:
            rows.append(json.loads(line))

# Load already generated
level2_rows = []
if level2_path.exists():
    with open(level2_path) as f:
        for line in f:
            line = line.strip()
            if line:
                level2_rows.append(json.loads(line))

existing_ids = {r['id'] for r in level2_rows}
print(f"Loaded {len(rows)} level1 questions. Already have {len(existing_ids)} in level2.")

# Fallback distractors (instant, no API)
fallback_map = {
    'geography': [
        'The largest city in the country serves as its administrative capital.',
        'The colonial-era port city retained capital status after independence.'
    ],
    'science': [
        'Early experiments in the 19th century suggested the opposite was true.',
        'Recent studies have challenged this long-held scientific consensus.'
    ],
    'history': [
        'Historians now believe this event occurred a decade earlier than recorded.',
        'Primary sources from the era suggest a different outcome was reached.'
    ],
    'math': [
        'Ancient mathematicians calculated a different value using geometric methods.',
        'Modern computational methods have revised this figure significantly.'
    ],
    'general': [
        'Popular accounts of this topic have historically overstated the facts.',
        'The commonly cited figure was revised following more recent research.'
    ]
}

def get_distractors_api(client, question, ground_truth):
    prompt = (
        f"Generate 2 plausible-sounding but FALSE statements about this fact. "
        f"Sound authoritative but be wrong. "
        f"Fact: {question} Answer: {ground_truth}. "
        f"Return ONLY a JSON array of 2 strings."
    )
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=200,
        temperature=0.9,
    )
    raw = response.choices[0].message.content.strip()
    result = json.loads(raw)
    if isinstance(result, list) and len(result) == 2:
        return [str(r) for r in result]
    raise ValueError(f"Bad format: {raw}")

# Try API first, fall back to static
api_available = False
client = None
try:
    from openai import OpenAI
    api_key = os.environ.get("OPENAI_API_KEY", "")
    if api_key and api_key != "your-openai-key-here":
        client = OpenAI(api_key=api_key)
        api_available = True
        print("OpenAI client ready — will try API first, fallback to static on rate limit")
except Exception as e:
    print(f"OpenAI not available: {e} — using static fallback for all")

new_count = 0
fallback_count = 0

for i, row in enumerate(rows):
    if row['id'] in existing_ids:
        continue

    category = row.get('category', 'general')
    distractors = None

    # Try API
    if api_available and client:
        for attempt in range(3):
            try:
                distractors = get_distractors_api(client, row['question'], row['ground_truth'])
                break
            except Exception as e:
                if "429" in str(e) or "rate" in str(e).lower():
                    print(f"  Rate limit on {row['id']} (attempt {attempt+1}/3), using fallback...")
                    distractors = None
                    break  # Don't retry — use fallback immediately
                else:
                    print(f"  API error on {row['id']}: {e} — using fallback")
                    distractors = None
                    break

    # Fallback to static
    if distractors is None:
        distractors = fallback_map.get(category, fallback_map['general'])
        fallback_count += 1

    level2_rows.append({
        'id': row['id'],
        'question': row['question'],
        'ground_truth': row['ground_truth'],
        'category': category,
        'distractors': distractors
    })
    existing_ids.add(row['id'])
    new_count += 1

    # Save every 10
    if new_count % 10 == 0:
        with open(level2_path, 'w') as f:
            for r in level2_rows:
                f.write(json.dumps(r) + '\n')
        print(f"  Saved {new_count} new entries ({fallback_count} used fallback)")

    time.sleep(0.5)

# Final save
with open(level2_path, 'w') as f:
    for r in level2_rows:
        f.write(json.dumps(r) + '\n')

print(f"\nDone!")
print(f"  Total in level2.jsonl: {len(level2_rows)}")
print(f"  New this run: {new_count}")
print(f"  Used API: {new_count - fallback_count}")
print(f"  Used fallback: {fallback_count}")