vera-rubric-decision-engine / scripts /generate_submission.py
mokshak's picture
Generalize merchant asset bridge
6ef36d5 verified
from __future__ import annotations
import json
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(ROOT))
from app.composer import compose # noqa: E402
from dataset.generate_dataset import ( # noqa: E402
SEED,
expand_customers,
expand_merchants,
expand_triggers,
load_seeds,
random,
write_outputs,
write_test_pairs,
)
def ensure_expanded() -> Path:
out_dir = ROOT / "expanded"
if (out_dir / "test_pairs.json").exists():
return out_dir
rnd = random.Random(SEED)
categories, merchant_seeds, customer_seeds, trigger_seeds = load_seeds(ROOT / "dataset")
merchants = expand_merchants(merchant_seeds, rnd)
customers = expand_customers(customer_seeds, merchants, rnd)
triggers = expand_triggers(trigger_seeds, merchants, customers, rnd)
write_outputs(out_dir, categories, merchants, customers, triggers)
write_test_pairs(out_dir, triggers, rnd)
return out_dir
def load_json(path: Path) -> dict:
return json.loads(path.read_text(encoding="utf-8"))
def main() -> None:
out_dir = ensure_expanded()
pairs = load_json(out_dir / "test_pairs.json")["pairs"]
submission_path = ROOT / "submission.jsonl"
with submission_path.open("w", encoding="utf-8", newline="\n") as fp:
for pair in pairs:
merchant = load_json(out_dir / "merchants" / f"{pair['merchant_id']}.json")
category = load_json(out_dir / "categories" / f"{merchant['category_slug']}.json")
trigger = load_json(out_dir / "triggers" / f"{pair['trigger_id']}.json")
customer = None
if pair.get("customer_id"):
customer = load_json(out_dir / "customers" / f"{pair['customer_id']}.json")
result = compose(category, merchant, trigger, customer)
result["test_id"] = pair["test_id"]
fp.write(json.dumps(result, ensure_ascii=False) + "\n")
print(f"Wrote {submission_path}")
if __name__ == "__main__":
main()