"""Test fixtures for the deck-builder Space. 80 synthetic cards with 1024-dim L2-normalized embeddings. The volume matters here: a 50-card deck with up to 4 copies per card needs at minimum 13 unique candidates per cost bucket to fill cleanly. 80 cards across 4 types and 8 color groupings gives enough variety for deck-builder tests to exercise both the cost-curve targeting and the backfill paths. """ from __future__ import annotations from pathlib import Path from typing import Any import numpy as np import pandas as pd import pytest from optcg_cards.provenance import ( EmbedProvenance, FetchProvenance, write_provenance, ) EMBEDDING_DIM = 1024 N_CARDS = 200 # 6 base colors + 6 adjacent bi-color combos. Bi-colors widen the # candidate pool for any chosen leader (a Red leader can also draft # Red/Green and Red/Black cards), so the deck builder has room to fill # 50 slots at <=4 copies each. _COLORS_POOL = [ ["Red"], ["Green"], ["Blue"], ["Purple"], ["Black"], ["Yellow"], ["Red", "Green"], ["Green", "Blue"], ["Blue", "Purple"], ["Purple", "Black"], ["Black", "Yellow"], ["Yellow", "Red"], ] _CARD_TYPES = ["Character", "Event", "Stage", "Leader"] _RARITIES = ["C", "UC", "R", "SR", "L"] _FAMILIES = [ ["Straw Hat Crew"], ["Animal Kingdom Pirates"], ["Marines"], ["Worst Generation"], ["Big Mom Pirates"], ] def _color_for(i: int) -> list[str]: # 5 is coprime with 12, so type (i%4) and color cycles never align. return _COLORS_POOL[(i * 5 + 1) % len(_COLORS_POOL)] def _unit_vector(rng: np.random.Generator, dim: int) -> list[float]: v = rng.standard_normal(dim).astype(np.float32) v /= np.linalg.norm(v) return v.tolist() @pytest.fixture def synthetic_cards() -> list[dict[str, Any]]: rng = np.random.default_rng(seed=42) cards: list[dict[str, Any]] = [] for i in range(N_CARDS): ctype = _CARD_TYPES[i % len(_CARD_TYPES)] cards.append( { "id": f"OP01-{i:03d}", "code": f"OP01-{i:03d}", "name": f"Card {i}", "card_type": ctype, "colors": _color_for(i), # Spread costs 1-9 with a few stages at None "cost": None if (ctype == "Stage" and i % 8 == 3) else (1 + i % 9), "power": 1000 * (1 + i % 9), "counter": (i % 3) * 1000 if (i % 3) else None, "life": 5 if ctype == "Leader" else None, "attribute": "Slash" if i % 2 else "Strike", "family": _FAMILIES[i % len(_FAMILIES)], "effect_text": f"Effect for card {i}.", "trigger_text": "", "rarity": _RARITIES[i % len(_RARITIES)], "pack_id": "OP01", "set_code": "OP01", "set_name": "Romance Dawn", "language": "en", "umap_x": float(rng.uniform(-10, 10)), "umap_y": float(rng.uniform(-10, 10)), "embedding": _unit_vector(rng, EMBEDDING_DIM), } ) return cards @pytest.fixture def synthetic_embed_provenance() -> EmbedProvenance: return EmbedProvenance( model_id="Qwen/Qwen3-Embedding-0.6B", embedding_dim=EMBEDDING_DIM, matryoshka_dim=None, task_instruction=( "Instruct: Represent this One Piece Card Game card so that " "mechanically similar cards are close in embedding space.\n" "Text: {card_document}" ), embedded_at="2026-05-14T00:00:00+00:00", sentence_transformers_version="5.4.1", ) @pytest.fixture def synthetic_fetch_provenance() -> FetchProvenance: return FetchProvenance( source="vegapull", source_url="https://en.onepiece-cardgame.com/cardlist/", source_attribution="vegapull scraping en.onepiece-cardgame.com", source_fetched_at="2026-05-14T00:00:00+00:00", language="en", n_cards=N_CARDS, pack_ids_included=["OP01"], latest_pack_id="OP01", vegapull_version="1.2.2", ) @pytest.fixture def synthetic_repo( tmp_path: Path, synthetic_cards: list[dict[str, Any]], synthetic_fetch_provenance: FetchProvenance, synthetic_embed_provenance: EmbedProvenance, ) -> dict[str, Path]: parquet_path = tmp_path / "cards_with_embeddings.parquet" pd.DataFrame(synthetic_cards).to_parquet(parquet_path, index=False) prov_path = tmp_path / "provenance.json" write_provenance( prov_path, fetch=synthetic_fetch_provenance, embed=synthetic_embed_provenance, ) return {"parquet": parquet_path, "provenance": prov_path, "root": tmp_path} @pytest.fixture def patched_hf_download( monkeypatch: pytest.MonkeyPatch, synthetic_repo: dict[str, Path], ): """Patch huggingface_hub.hf_hub_download so spaceutil.data.load_corpus pulls from the local synthetic_repo instead of the network.""" def fake_download( repo_id: str, filename: str, repo_type: str | None = None, token: str | None = None, **kwargs: Any, ) -> str: if filename == "cards_with_embeddings.parquet": return str(synthetic_repo["parquet"]) if filename == "provenance.json": return str(synthetic_repo["provenance"]) raise FileNotFoundError(f"Unexpected filename in synthetic repo: {filename}") import huggingface_hub monkeypatch.setattr(huggingface_hub, "hf_hub_download", fake_download) try: import spaceutil.data as data_mod monkeypatch.setattr(data_mod, "hf_hub_download", fake_download, raising=False) except ImportError: pass return fake_download