| from __future__ import annotations |
|
|
| import json |
| import sys |
| from pathlib import Path |
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| sys.path.insert(0, str(ROOT)) |
|
|
| from app.composer import compose |
| from dataset.generate_dataset import ( |
| SEED, |
| expand_customers, |
| expand_merchants, |
| expand_triggers, |
| load_seeds, |
| random, |
| write_outputs, |
| write_test_pairs, |
| ) |
|
|
|
|
| def ensure_expanded() -> Path: |
| out_dir = ROOT / "expanded" |
| if (out_dir / "test_pairs.json").exists(): |
| return out_dir |
| rnd = random.Random(SEED) |
| categories, merchant_seeds, customer_seeds, trigger_seeds = load_seeds(ROOT / "dataset") |
| merchants = expand_merchants(merchant_seeds, rnd) |
| customers = expand_customers(customer_seeds, merchants, rnd) |
| triggers = expand_triggers(trigger_seeds, merchants, customers, rnd) |
| write_outputs(out_dir, categories, merchants, customers, triggers) |
| write_test_pairs(out_dir, triggers, rnd) |
| return out_dir |
|
|
|
|
| def load_json(path: Path) -> dict: |
| return json.loads(path.read_text(encoding="utf-8")) |
|
|
|
|
| def main() -> None: |
| out_dir = ensure_expanded() |
| pairs = load_json(out_dir / "test_pairs.json")["pairs"] |
| submission_path = ROOT / "submission.jsonl" |
| with submission_path.open("w", encoding="utf-8", newline="\n") as fp: |
| for pair in pairs: |
| merchant = load_json(out_dir / "merchants" / f"{pair['merchant_id']}.json") |
| category = load_json(out_dir / "categories" / f"{merchant['category_slug']}.json") |
| trigger = load_json(out_dir / "triggers" / f"{pair['trigger_id']}.json") |
| customer = None |
| if pair.get("customer_id"): |
| customer = load_json(out_dir / "customers" / f"{pair['customer_id']}.json") |
| result = compose(category, merchant, trigger, customer) |
| result["test_id"] = pair["test_id"] |
| fp.write(json.dumps(result, ensure_ascii=False) + "\n") |
| print(f"Wrote {submission_path}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|