from __future__ import annotations import json import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] sys.path.insert(0, str(ROOT)) from app.composer import compose # noqa: E402 from dataset.generate_dataset import ( # noqa: E402 SEED, expand_customers, expand_merchants, expand_triggers, load_seeds, random, write_outputs, write_test_pairs, ) def ensure_expanded() -> Path: out_dir = ROOT / "expanded" if (out_dir / "test_pairs.json").exists(): return out_dir rnd = random.Random(SEED) categories, merchant_seeds, customer_seeds, trigger_seeds = load_seeds(ROOT / "dataset") merchants = expand_merchants(merchant_seeds, rnd) customers = expand_customers(customer_seeds, merchants, rnd) triggers = expand_triggers(trigger_seeds, merchants, customers, rnd) write_outputs(out_dir, categories, merchants, customers, triggers) write_test_pairs(out_dir, triggers, rnd) return out_dir def load_json(path: Path) -> dict: return json.loads(path.read_text(encoding="utf-8")) def main() -> None: out_dir = ensure_expanded() pairs = load_json(out_dir / "test_pairs.json")["pairs"] submission_path = ROOT / "submission.jsonl" with submission_path.open("w", encoding="utf-8", newline="\n") as fp: for pair in pairs: merchant = load_json(out_dir / "merchants" / f"{pair['merchant_id']}.json") category = load_json(out_dir / "categories" / f"{merchant['category_slug']}.json") trigger = load_json(out_dir / "triggers" / f"{pair['trigger_id']}.json") customer = None if pair.get("customer_id"): customer = load_json(out_dir / "customers" / f"{pair['customer_id']}.json") result = compose(category, merchant, trigger, customer) result["test_id"] = pair["test_id"] fp.write(json.dumps(result, ensure_ascii=False) + "\n") print(f"Wrote {submission_path}") if __name__ == "__main__": main()