Spaces:

t22000t
/

optcg-deck-builder

Sleeping

File size: 5,755 Bytes

16eaadc

"""Test fixtures for the deck-builder Space.

80 synthetic cards with 1024-dim L2-normalized embeddings. The volume
matters here: a 50-card deck with up to 4 copies per card needs at
minimum 13 unique candidates per cost bucket to fill cleanly. 80 cards
across 4 types and 8 color groupings gives enough variety for
deck-builder tests to exercise both the cost-curve targeting and the
backfill paths.
"""

from __future__ import annotations

from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd
import pytest
from optcg_cards.provenance import (
    EmbedProvenance,
    FetchProvenance,
    write_provenance,
)

EMBEDDING_DIM = 1024
N_CARDS = 200

# 6 base colors + 6 adjacent bi-color combos. Bi-colors widen the
# candidate pool for any chosen leader (a Red leader can also draft
# Red/Green and Red/Black cards), so the deck builder has room to fill
# 50 slots at <=4 copies each.
_COLORS_POOL = [
    ["Red"], ["Green"], ["Blue"], ["Purple"], ["Black"], ["Yellow"],
    ["Red", "Green"], ["Green", "Blue"], ["Blue", "Purple"],
    ["Purple", "Black"], ["Black", "Yellow"], ["Yellow", "Red"],
]
_CARD_TYPES = ["Character", "Event", "Stage", "Leader"]
_RARITIES = ["C", "UC", "R", "SR", "L"]
_FAMILIES = [
    ["Straw Hat Crew"],
    ["Animal Kingdom Pirates"],
    ["Marines"],
    ["Worst Generation"],
    ["Big Mom Pirates"],
]


def _color_for(i: int) -> list[str]:
    # 5 is coprime with 12, so type (i%4) and color cycles never align.
    return _COLORS_POOL[(i * 5 + 1) % len(_COLORS_POOL)]


def _unit_vector(rng: np.random.Generator, dim: int) -> list[float]:
    v = rng.standard_normal(dim).astype(np.float32)
    v /= np.linalg.norm(v)
    return v.tolist()


@pytest.fixture
def synthetic_cards() -> list[dict[str, Any]]:
    rng = np.random.default_rng(seed=42)
    cards: list[dict[str, Any]] = []
    for i in range(N_CARDS):
        ctype = _CARD_TYPES[i % len(_CARD_TYPES)]
        cards.append(
            {
                "id": f"OP01-{i:03d}",
                "code": f"OP01-{i:03d}",
                "name": f"Card {i}",
                "card_type": ctype,
                "colors": _color_for(i),
                # Spread costs 1-9 with a few stages at None
                "cost": None if (ctype == "Stage" and i % 8 == 3) else (1 + i % 9),
                "power": 1000 * (1 + i % 9),
                "counter": (i % 3) * 1000 if (i % 3) else None,
                "life": 5 if ctype == "Leader" else None,
                "attribute": "Slash" if i % 2 else "Strike",
                "family": _FAMILIES[i % len(_FAMILIES)],
                "effect_text": f"Effect for card {i}.",
                "trigger_text": "",
                "rarity": _RARITIES[i % len(_RARITIES)],
                "pack_id": "OP01",
                "set_code": "OP01",
                "set_name": "Romance Dawn",
                "language": "en",
                "umap_x": float(rng.uniform(-10, 10)),
                "umap_y": float(rng.uniform(-10, 10)),
                "embedding": _unit_vector(rng, EMBEDDING_DIM),
            }
        )
    return cards


@pytest.fixture
def synthetic_embed_provenance() -> EmbedProvenance:
    return EmbedProvenance(
        model_id="Qwen/Qwen3-Embedding-0.6B",
        embedding_dim=EMBEDDING_DIM,
        matryoshka_dim=None,
        task_instruction=(
            "Instruct: Represent this One Piece Card Game card so that "
            "mechanically similar cards are close in embedding space.\n"
            "Text: {card_document}"
        ),
        embedded_at="2026-05-14T00:00:00+00:00",
        sentence_transformers_version="5.4.1",
    )


@pytest.fixture
def synthetic_fetch_provenance() -> FetchProvenance:
    return FetchProvenance(
        source="vegapull",
        source_url="https://en.onepiece-cardgame.com/cardlist/",
        source_attribution="vegapull scraping en.onepiece-cardgame.com",
        source_fetched_at="2026-05-14T00:00:00+00:00",
        language="en",
        n_cards=N_CARDS,
        pack_ids_included=["OP01"],
        latest_pack_id="OP01",
        vegapull_version="1.2.2",
    )


@pytest.fixture
def synthetic_repo(
    tmp_path: Path,
    synthetic_cards: list[dict[str, Any]],
    synthetic_fetch_provenance: FetchProvenance,
    synthetic_embed_provenance: EmbedProvenance,
) -> dict[str, Path]:
    parquet_path = tmp_path / "cards_with_embeddings.parquet"
    pd.DataFrame(synthetic_cards).to_parquet(parquet_path, index=False)
    prov_path = tmp_path / "provenance.json"
    write_provenance(
        prov_path,
        fetch=synthetic_fetch_provenance,
        embed=synthetic_embed_provenance,
    )
    return {"parquet": parquet_path, "provenance": prov_path, "root": tmp_path}


@pytest.fixture
def patched_hf_download(
    monkeypatch: pytest.MonkeyPatch,
    synthetic_repo: dict[str, Path],
):
    """Patch huggingface_hub.hf_hub_download so spaceutil.data.load_corpus
    pulls from the local synthetic_repo instead of the network."""

    def fake_download(
        repo_id: str,
        filename: str,
        repo_type: str | None = None,
        token: str | None = None,
        **kwargs: Any,
    ) -> str:
        if filename == "cards_with_embeddings.parquet":
            return str(synthetic_repo["parquet"])
        if filename == "provenance.json":
            return str(synthetic_repo["provenance"])
        raise FileNotFoundError(f"Unexpected filename in synthetic repo: {filename}")

    import huggingface_hub

    monkeypatch.setattr(huggingface_hub, "hf_hub_download", fake_download)
    try:
        import spaceutil.data as data_mod

        monkeypatch.setattr(data_mod, "hf_hub_download", fake_download, raising=False)
    except ImportError:
        pass

    return fake_download