Spaces:

t22000t
/

optcg-deck-builder

Sleeping

t22000t commited on 4 days ago

Commit

16eaadc

0 Parent(s):

Initial commit: optcg-deck-builder Gradio Space

Auto-generates legal 50-card OPTCG decks anchored on a chosen Leader.
Same color-legality + family-bonus synergy ranker as the explorer
Space, layered with three cost-curve presets (aggro/midrange/control)
and a 4-copies-per-card cap. Always produces exactly 50 cards: cost
buckets fill first, then a backfill pass picks up any deficit from the
remaining top-synergy candidates. No encoder loaded - the leader's
vector comes from the precomputed corpus matrix, so cold start is
seconds rather than tens of seconds. 30 hermetic tests cover the size
invariant, color legality, copy cap, leader exclusion, style
sensitivity, plain-text export, and the breakdown plots.

Files changed (15) hide show

.gitignore +36 -0
README.md +65 -0
app.py +391 -0
pyproject.toml +14 -0
requirements.txt +4 -0
spaceutil/__init__.py +0 -0
spaceutil/data.py +86 -0
spaceutil/deck.py +263 -0
spaceutil/plot.py +112 -0
spaceutil/synergy.py +126 -0
tests/__init__.py +0 -0
tests/conftest.py +174 -0
tests/test_data.py +112 -0
tests/test_deck.py +222 -0
tests/test_plot.py +78 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,36 @@

+# Python
+__pycache__/
+*.py[cod]
+*.so
+# Virtual environments
+.venv/
+venv/
+# Testing
+.pytest_cache/
+.coverage
+# Linters
+.ruff_cache/
+.mypy_cache/
+# IDEs
+.vscode/
+.idea/
+*.swp
+# OS
+.DS_Store
+# Environment
+.env
+.env.local
+# HuggingFace
+.cache/huggingface/
+# Claude Code
+CLAUDE.md
+.claude/
+.reliability-mode

README.md ADDED Viewed

	@@ -0,0 +1,65 @@

+---
+title: OPTCG Deck Builder
+colorFrom: red
+colorTo: blue
+sdk: gradio
+sdk_version: 5.49.1
+python_version: "3.12"
+app_file: app.py
+pinned: false
+license: mit
+short_description: Auto-build legal 50-card One Piece TCG decks
+---
+# OPTCG Deck Builder
+Pick any **Leader** from the [optcg-en-card-embeddings](https://huggingface.co/datasets/t22000t/optcg-en-card-embeddings) dataset and the Space generates a legal 50-card OPTCG deck around it. The ranker layers OPTCG deckbuilding rules on top of the Qwen3-Embedding-derived synergy score:
+- **Color legality** - every card shares at least one color with the Leader.
+- **Family bonus** - cards in the Leader's family (Straw Hat Crew, Marines, etc.) get a +0.10 synergy boost so they outrank merely-similar off-archetype options.
+- **Copy cap** - each card appears at most 4 times (the OPTCG standard).
+- **Cost curve targeting** - one of three style presets shapes how slots are distributed across costs.
+Sister Space: [OPTCG Card Explorer](https://huggingface.co/spaces/t22000t/optcg-explorer) - semantic search, UMAP scatter, similar-cards browser.
+## Style presets
+Each preset is a target distribution that sums to 50 cards. The builder fills cost buckets in order, and any deficit at the end spills into a backfill pass over remaining top-synergy cards (so the total is *always* 50).
+| Cost   | aggro | midrange | control |
+|--------|------:|---------:|--------:|
+| 1      |     4 |        0 |       0 |
+| 2      |    12 |        6 |       4 |
+| 3      |    12 |       10 |       8 |
+| 4      |     8 |       10 |       8 |
+| 5      |     6 |        8 |       8 |
+| 6      |     4 |        8 |       8 |
+| 7      |     2 |        4 |       6 |
+| 8+     |     2 |        4 |       8 |
+## What the Space does *not* do
+- No archetype/strategy detection (it does not know that, say, "Monkey.D.Luffy / OP01-001" is the aggro leader).
+- No banlist or meta awareness.
+- No card art (this is a structured-data / text-only project by design - see [parent project](https://github.com/timothy22000/optcg-cards) for the IP rationale).
+- The result is a sketch you tweak, not a tournament-ready list.
+## Configuration
+- `HF_TOKEN` (Space secret) - required only while the source dataset stays private.
+## Development
+```bash
+pip install -r requirements.txt
+export HF_TOKEN=hf_...   # only if the source dataset is private
+python app.py
+```
+```bash
+pytest -v        # 21 hermetic tests
+```
+## License
+MIT. Card data via the [vegapull](https://github.com/Coko7/vegapull) scraper. Not affiliated with Bandai or the One Piece Card Game.

app.py ADDED Viewed

	@@ -0,0 +1,391 @@

+"""OPTCG Deck Builder - Gradio Space.
+Auto-generates a 50-card OPTCG deck anchored on a chosen Leader. The
+ranker is the same color-legality + family-bonus synergy used in the
+explorer Space; the deck builder layers a target cost curve on top
+(aggro/midrange/control presets) and a 4-copies-per-card cap.
+No embedding model is loaded - the leader's vector is read directly
+from the precomputed corpus matrix. That keeps cold start to seconds
+and the Space lightweight on a free CPU runner.
+Data source: https://huggingface.co/datasets/t22000t/optcg-en-card-embeddings
+"""
+from __future__ import annotations
+import logging
+import os
+from typing import Any
+import gradio as gr
+from spaceutil.data import load_corpus
+from spaceutil.deck import COST_CURVES, Deck, build_deck, deck_to_text
+from spaceutil.plot import (
+    build_color_breakdown_figure,
+    build_cost_curve_figure,
+    build_type_breakdown_figure,
+)
+logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
+logger = logging.getLogger("optcg-deck-builder")
+# ----------------------------------------------------------------------------
+# Startup
+# ----------------------------------------------------------------------------
+logger.info("Loading corpus from HF Hub...")
+CARDS, MATRIX, EMBED_PROV, ID_TO_IDX = load_corpus(token=os.environ.get("HF_TOKEN"))
+logger.info("Corpus loaded: %d cards, matrix shape %s", len(CARDS), MATRIX.shape)
+LEADER_CHOICES = sorted(
+    f"{c['name']} ({c['id']})"
+    for c in CARDS
+    if c.get("card_type") == "Leader"
+)
+N_CARDS = len(CARDS)
+N_SETS = len({c.get("set_code") for c in CARDS if c.get("set_code")})
+N_LEADERS = sum(1 for c in CARDS if c.get("card_type") == "Leader")
+LATEST_SET = max((c.get("set_code") or "" for c in CARDS), default="?")
+EMBEDDING_DIM = MATRIX.shape[1]
+STYLE_OPTIONS = sorted(COST_CURVES.keys())
+# ----------------------------------------------------------------------------
+# Display helpers
+# ----------------------------------------------------------------------------
+DECK_HEADERS = [
+    "Qty", "ID", "Name", "Cost", "Type", "Synergy", "Family", "Colors", "Set",
+]
+def deck_to_rows(deck: Deck | None) -> list[list[Any]]:
+    if deck is None:
+        return []
+    return [
+        [
+            dc.quantity,
+            dc.card_id,
+            dc.name,
+            dc.cost if dc.cost is not None else "-",
+            dc.card_type,
+            round(dc.synergy_score, 4),
+            "yes" if dc.family_match else "no",
+            ", ".join(dc.colors),
+            dc.set_code,
+        ]
+        for dc in deck.cards
+    ]
+LEADER_DETAIL_FIELDS = (
+    ("card_type", "Type"),
+    ("colors", "Colors"),
+    ("life", "Life"),
+    ("attribute", "Attribute"),
+    ("family", "Family"),
+    ("rarity", "Rarity"),
+    ("set_name", "Set"),
+    ("effect_text", "Effect"),
+)
+def _fmt_value(value: Any) -> str:
+    if value is None or value == "":
+        return "-"
+    if isinstance(value, list):
+        return ", ".join(str(v) for v in value) if value else "-"
+    return str(value)
+def format_leader_detail(card: dict[str, Any]) -> str:
+    lines = [f"### {card.get('name', '?')}\n`{card.get('id', '?')}`\n"]
+    for key, label in LEADER_DETAIL_FIELDS:
+        lines.append(f"**{label}:** {_fmt_value(card.get(key))}")
+    return "\n\n".join(lines)
+def format_summary(deck: Deck | None) -> str:
+    if deck is None:
+        return "*Pick a leader and click Build deck.*"
+    lines = [
+        f"**Total cards:** {deck.total_quantity} / 50",
+        f"**Average cost:** {deck.avg_cost:.2f}",
+        f"**Style:** {deck.style}",
+        f"**Family-match cards:** {deck.family_match_count} / {deck.total_quantity}",
+        f"**Unique cards:** {len(deck.cards)}",
+    ]
+    return "\n\n".join(lines)
+# ----------------------------------------------------------------------------
+# Event handlers
+# ----------------------------------------------------------------------------
+def _selection_to_idx(selection: str) -> int | None:
+    if not selection:
+        return None
+    if "(" in selection and selection.endswith(")"):
+        card_id = selection.rsplit("(", 1)[1][:-1]
+    else:
+        card_id = selection
+    return ID_TO_IDX.get(card_id)
+def on_leader_change(selection: str):
+    idx = _selection_to_idx(selection)
+    if idx is None:
+        return "*Pick a Leader to see its details.*"
+    return format_leader_detail(CARDS[idx])
+def on_build(selection: str, style: str, max_copies: int):
+    idx = _selection_to_idx(selection)
+    if idx is None:
+        empty_fig = build_cost_curve_figure(None)
+        return (
+            "*Pick a Leader first.*",
+            "*Pick a Leader first.*",
+            empty_fig,
+            build_type_breakdown_figure(None),
+            build_color_breakdown_figure(None),
+            [],
+            "",
+        )
+    deck = build_deck(idx, CARDS, MATRIX, style=style, max_copies=int(max_copies))
+    return (
+        format_leader_detail(CARDS[idx]),
+        format_summary(deck),
+        build_cost_curve_figure(deck),
+        build_type_breakdown_figure(deck),
+        build_color_breakdown_figure(deck),
+        deck_to_rows(deck),
+        deck_to_text(deck),
+    )
+# ----------------------------------------------------------------------------
+# UI
+# ----------------------------------------------------------------------------
+CUSTOM_CSS = """
+.gradio-container { max-width: 1280px !important; margin: 0 auto !important; }
+#header-row h1 { margin-bottom: 0.25em; }
+#header-row .subtitle { color: var(--body-text-color-subdued); margin-top: 0; }
+.stats-pill {
+  display: inline-block;
+  padding: 4px 10px;
+  margin: 2px 4px 2px 0;
+  border-radius: 12px;
+  background: var(--background-fill-secondary);
+  border: 1px solid var(--border-color-primary);
+  font-size: 0.85em;
+}
+.muted { color: var(--body-text-color-subdued); font-size: 0.9em; }
+"""
+INSTRUCTIONS_MD = f"""
+**How to build a deck**
+1. **Pick a Leader** from the dropdown (~{N_LEADERS} leaders in the corpus). The Leader anchors color identity, archetype, and the synergy scoring.
+2. **Choose a style**:
+   - `aggro` - cost curve weighted to 1-3, flood the early board.
+   - `midrange` - 3-6 cost dominant, the safe default.
+   - `control` - 4-8 cost weighted, bigger threats and fewer turns to defend.
+3. **Set max copies** (1-4). Standard OPTCG rules cap at 4. Lowering the cap forces more variety.
+4. Click **Build deck**. You get a 50-card list, a cost-curve check against the target preset, and type/color breakdowns.
+5. **Export** the plain-text deck list at the bottom and paste into your sim of choice.
+**What the builder does**
+It scores every color-legal candidate by `cosine_similarity(leader, card) + family_bonus`, then walks the chosen cost curve bucket by bucket, taking top-synergy cards (up to `max_copies` each) until each bucket is filled. If a bucket is short, the deficit spills into a backfill pass that takes the remaining best-synergy cards regardless of cost - the deck total is *always* exactly 50.
+**What it does not do (yet)**
+- No archetype/strategy detection (it doesn't know whether your leader is "the rush leader" or "the control leader").
+- No banlist or competitive-meta awareness.
+- No DON!! deck (always 10, hardcoded across the game).
+- The result is a *starting point* for tweaking, not a tournament-ready list.
+"""
+ABOUT_MD = f"""
+### How synergy is scored
+Each card gets a score of `cosine_similarity(leader_vector, card_vector) + 0.10 if same_family else 0`. Vectors come from `Qwen/Qwen3-Embedding-0.6B` ({EMBEDDING_DIM}-dim, L2-normalized) on the published [optcg-en-card-embeddings](https://huggingface.co/datasets/t22000t/optcg-en-card-embeddings) dataset.
+Color legality is a hard filter (you must share at least one color with the leader). Other Leader cards are dropped.
+### Why styles matter
+Two decks built around the same leader can play very differently depending on cost distribution. The presets are deliberately blunt - they're starting shapes, not optimized curves:
+- aggro: 4-12-12-8-6-4-2-2 (sum 50)
+- midrange: 0-6-10-10-8-8-4-4
+- control: 0-4-8-8-8-8-6-8
+### Source
+Card data from [vegapull](https://github.com/Coko7/vegapull) scraping the official One Piece Card Game site. Pipeline: [github.com/timothy22000/optcg-cards](https://github.com/timothy22000/optcg-cards). Sister demo: [OPTCG Card Explorer](https://huggingface.co/spaces/t22000t/optcg-explorer) (semantic search + UMAP + similar-cards browser).
+Not affiliated with Bandai or the One Piece Card Game.
+"""
+with gr.Blocks(
+    title="OPTCG Deck Builder",
+    theme=gr.themes.Soft(primary_hue="red", secondary_hue="blue"),
+    css=CUSTOM_CSS,
+) as demo:
+    # ----- Header -----
+    with gr.Row(elem_id="header-row"):
+        gr.Markdown(
+            f"""# OPTCG Deck Builder
+<p class="subtitle">Auto-generate a legal 50-card One Piece Card Game deck anchored on any Leader.</p>
+<div>
+  <span class="stats-pill"><b>{N_CARDS}</b> cards</span>
+  <span class="stats-pill"><b>{N_LEADERS}</b> leaders</span>
+  <span class="stats-pill"><b>{N_SETS}</b> sets</span>
+  <span class="stats-pill">latest <b>{LATEST_SET}</b></span>
+  <span class="stats-pill">3 styles</span>
+</div>
+<p class="muted">Dataset: <a href="https://huggingface.co/datasets/t22000t/optcg-en-card-embeddings" target="_blank">t22000t/optcg-en-card-embeddings</a> &nbsp;&middot;&nbsp; Code: <a href="https://github.com/timothy22000/optcg-cards" target="_blank">github.com/timothy22000/optcg-cards</a> &nbsp;&middot;&nbsp; Sister: <a href="https://huggingface.co/spaces/t22000t/optcg-explorer" target="_blank">OPTCG Card Explorer</a></p>
+"""
+        )
+    # ----- Instructions -----
+    with gr.Accordion("How to use this Space", open=True):
+        gr.Markdown(INSTRUCTIONS_MD)
+    # ----- Controls -----
+    with gr.Row():
+        leader_picker = gr.Dropdown(
+            choices=LEADER_CHOICES,
+            label="Leader",
+            value=None,
+            allow_custom_value=False,
+            filterable=True,
+            info="Type to filter by name or card ID.",
+            scale=4,
+        )
+        style_picker = gr.Dropdown(
+            choices=STYLE_OPTIONS,
+            value="midrange",
+            label="Style",
+            scale=1,
+        )
+        max_copies_slider = gr.Slider(
+            minimum=1, maximum=4, value=4, step=1,
+            label="Max copies",
+            scale=1,
+        )
+        build_btn = gr.Button("Build deck", variant="primary", scale=1)
+    # ----- Leader detail + summary -----
+    with gr.Row():
+        leader_detail_md = gr.Markdown(
+            "*Pick a Leader to see its details.*",
+            label="Leader",
+        )
+        summary_md = gr.Markdown(
+            "*Pick a Leader and click Build deck.*",
+            label="Deck summary",
+        )
+    # ----- Charts -----
+    with gr.Row():
+        cost_curve_plot = gr.Plot(
+            value=build_cost_curve_figure(None),
+            label="Cost curve",
+        )
+    with gr.Row():
+        with gr.Column():
+            type_plot = gr.Plot(
+                value=build_type_breakdown_figure(None),
+                label="Type mix",
+            )
+        with gr.Column():
+            color_plot = gr.Plot(
+                value=build_color_breakdown_figure(None),
+                label="Color mix",
+            )
+    # ----- Deck list -----
+    gr.Markdown("### Deck list (sorted by cost, then synergy)")
+    deck_df = gr.Dataframe(
+        headers=DECK_HEADERS,
+        value=[],
+        label="Cards",
+        interactive=False,
+        wrap=True,
+        column_widths=["6%", "11%", "26%", "6%", "11%", "10%", "8%", "14%", "8%"],
+    )
+    # ----- Export -----
+    with gr.Accordion("Plain-text export", open=False):
+        gr.Markdown(
+            "Copy this and paste into your sim of choice. Format: one card per line, "
+            "`<qty>x <ID> <Name>`."
+        )
+        export_text = gr.Textbox(
+            value="",
+            label="Deck text",
+            lines=12,
+            max_lines=20,
+            show_copy_button=True,
+            interactive=False,
+        )
+    # ----- About -----
+    with gr.Accordion("About this Space", open=False):
+        gr.Markdown(ABOUT_MD)
+    gr.Markdown(
+        '<div class="muted" style="text-align:center; padding:12px 0;">'
+        'Built with <a href="https://gradio.app" target="_blank">Gradio</a>. '
+        'Embeddings: Qwen/Qwen3-Embedding-0.6B. '
+        'Card data via <a href="https://github.com/Coko7/vegapull" target="_blank">vegapull</a>. '
+        'Not affiliated with Bandai or the One Piece Card Game.'
+        '</div>'
+    )
+    # ----- Wiring -----
+    leader_picker.change(
+        on_leader_change,
+        inputs=[leader_picker],
+        outputs=[leader_detail_md],
+    )
+    build_outputs = [
+        leader_detail_md,
+        summary_md,
+        cost_curve_plot,
+        type_plot,
+        color_plot,
+        deck_df,
+        export_text,
+    ]
+    build_btn.click(
+        on_build,
+        inputs=[leader_picker, style_picker, max_copies_slider],
+        outputs=build_outputs,
+    )
+    style_picker.change(
+        on_build,
+        inputs=[leader_picker, style_picker, max_copies_slider],
+        outputs=build_outputs,
+    )
+    max_copies_slider.change(
+        on_build,
+        inputs=[leader_picker, style_picker, max_copies_slider],
+        outputs=build_outputs,
+    )
+if __name__ == "__main__":
+    demo.launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-ra -q --strict-markers -m 'not network'"
+markers = [
+    "network: tests that hit the network (opt-in via `-m network`)",
+]
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+[tool.ruff.lint]
+select = ["E", "F", "W", "I", "B", "UP", "SIM"]
+ignore = ["E501"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio>=5.0,<6
+huggingface_hub>=0.30
+plotly>=5.20
+optcg-cards @ git+https://github.com/timothy22000/optcg-cards@v0.1.0

spaceutil/__init__.py ADDED Viewed

File without changes

spaceutil/data.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""Load the published OPTCG embeddings corpus from HF Hub.
+Pulls `cards_with_embeddings.parquet` and `provenance.json` from the
+configured dataset repo, applies the same numpy-array-to-list coercion
+that the upstream CLI uses, and stacks the embedding column into a
+single float32 matrix that downstream code reuses without restacking.
+"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from typing import Any
+import numpy as np
+import pandas as pd
+from huggingface_hub import hf_hub_download
+from optcg_cards.provenance import EmbedProvenance, read_provenance
+logger = logging.getLogger(__name__)
+REPO_ID = "t22000t/optcg-en-card-embeddings"
+PARQUET_FILE = "cards_with_embeddings.parquet"
+PROVENANCE_FILE = "provenance.json"
+def load_corpus(
+    token: str | None,
+) -> tuple[list[dict[str, Any]], np.ndarray, EmbedProvenance, dict[str, int]]:
+    """Return `(cards, matrix, embed_provenance, id_to_idx)` for the
+    published embeddings corpus.
+    The `embedding` column is stripped from `cards` after stacking into
+    `matrix`. All list-typed columns are coerced to plain Python lists.
+    The token is passed to `hf_hub_download` but never written to logs.
+    """
+    logger.info(
+        "Loading corpus from %s (authenticated=%s)",
+        REPO_ID,
+        "yes" if token else "no",
+    )
+    parquet_path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=PARQUET_FILE,
+        repo_type="dataset",
+        token=token,
+    )
+    prov_path = hf_hub_download(
+        repo_id=REPO_ID,
+        filename=PROVENANCE_FILE,
+        repo_type="dataset",
+        token=token,
+    )
+    cards = _read_parquet_records(Path(parquet_path))
+    if not cards:
+        raise RuntimeError("Embeddings parquet returned 0 rows")
+    matrix = np.stack(
+        [np.asarray(c["embedding"], dtype=np.float32) for c in cards],
+        axis=0,
+    )
+    for card in cards:
+        card.pop("embedding", None)
+    id_to_idx = {card["id"]: i for i, card in enumerate(cards)}
+    _, embed_prov = read_provenance(Path(prov_path))
+    if embed_prov is None:
+        raise RuntimeError("Embeddings provenance is missing the `embed` block")
+    return cards, matrix, embed_prov, id_to_idx
+def _read_parquet_records(path: Path) -> list[dict[str, Any]]:
+    # Mirrors the coercion loop in optcg_cards.cli._read_parquet
+    # (cli.py:429-443). Pandas materializes list-typed parquet columns
+    # as ndarrays; downstream code expects plain Python lists.
+    df = pd.read_parquet(str(path))
+    records = df.to_dict(orient="records")
+    for record in records:
+        for key, value in record.items():
+            if isinstance(value, np.ndarray):
+                record[key] = value.tolist()
+    return records

spaceutil/deck.py ADDED Viewed

	@@ -0,0 +1,263 @@

+"""Auto-build a 50-card OPTCG deck around a chosen Leader.
+Algorithm
+---------
+1. Score every color-legal candidate via `recommend_synergy`
+   (cosine_similarity to leader + family bonus). Other Leader cards
+   and the chosen leader itself are excluded by the synergy step.
+2. Walk a *target cost curve* (chosen by `style`) bucket by bucket.
+   For each bucket, take top-synergy cards in that cost slot, assigning
+   up to `max_copies` per card, until the bucket is filled.
+3. If a cost bucket has insufficient candidates (rare in real corpora,
+   common in narrow synthetic fixtures), the deficit spills into a
+   final backfill pass that consumes the highest-synergy remaining
+   cards regardless of cost. Backfill respects the per-card copy cap.
+The result is always exactly 50 cards. Color legality and the copy cap
+are hard invariants enforced at every step.
+Cost curve presets
+------------------
+- aggro:    weighted to 1-3 cost - flood the early board.
+- midrange: 3-6 cost dominant - the safe default.
+- control:  4-8 cost weighted - bigger threats, less early presence.
+Each preset is a dict[int, int] summing to exactly 50.
+"""
+from __future__ import annotations
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Any
+import numpy as np
+from spaceutil.synergy import recommend_synergy
+DEFAULT_DECK_SIZE = 50
+DEFAULT_MAX_COPIES = 4
+# Each preset must sum to exactly DEFAULT_DECK_SIZE. The cost-8 bucket
+# captures everything 8+ (8, 9, 10, ...).
+COST_CURVES: dict[str, dict[int, int]] = {
+    "aggro":    {1: 4, 2: 12, 3: 12, 4: 8, 5: 6, 6: 4, 7: 2, 8: 2},
+    "midrange": {1: 0, 2: 6,  3: 10, 4: 10, 5: 8, 6: 8, 7: 4, 8: 4},
+    "control":  {1: 0, 2: 4,  3: 8,  4: 8,  5: 8, 6: 8, 7: 6, 8: 8},
+}
+@dataclass(frozen=True)
+class DeckCard:
+    card_id: str
+    name: str
+    quantity: int
+    cost: int | None
+    card_type: str
+    colors: list[str]
+    family: list[str]
+    rarity: str
+    set_code: str
+    synergy_score: float
+    family_match: bool
+@dataclass(frozen=True)
+class Deck:
+    leader: dict[str, Any]
+    cards: list[DeckCard]
+    style: str
+    target_curve: dict[int, int] = field(default_factory=dict)
+    @property
+    def total_quantity(self) -> int:
+        return sum(c.quantity for c in self.cards)
+    @property
+    def total_cost(self) -> int:
+        return sum((c.cost or 0) * c.quantity for c in self.cards)
+    @property
+    def avg_cost(self) -> float:
+        # Average is over the cards that actually have a cost (Stages
+        # without cost are excluded from the denominator). For typical
+        # OPTCG decks ~all cards have cost, so this matches intuition.
+        priced = [c for c in self.cards if c.cost is not None]
+        total_qty = sum(c.quantity for c in priced)
+        if total_qty == 0:
+            return 0.0
+        return sum((c.cost or 0) * c.quantity for c in priced) / total_qty
+    @property
+    def cost_distribution(self) -> dict[int, int]:
+        dist: dict[int, int] = {}
+        for c in self.cards:
+            if c.cost is None:
+                continue
+            bucket = min(int(c.cost), 8)
+            dist[bucket] = dist.get(bucket, 0) + c.quantity
+        return dist
+    @property
+    def type_distribution(self) -> dict[str, int]:
+        dist: dict[str, int] = {}
+        for c in self.cards:
+            dist[c.card_type] = dist.get(c.card_type, 0) + c.quantity
+        return dist
+    @property
+    def color_distribution(self) -> dict[str, int]:
+        dist: dict[str, int] = {}
+        for c in self.cards:
+            for color in c.colors or ["?"]:
+                dist[color] = dist.get(color, 0) + c.quantity
+        return dist
+    @property
+    def family_match_count(self) -> int:
+        return sum(c.quantity for c in self.cards if c.family_match)
+def build_deck(
+    leader_idx: int,
+    cards: list[dict[str, Any]],
+    matrix: np.ndarray,
+    style: str = "midrange",
+    max_copies: int = DEFAULT_MAX_COPIES,
+    deck_size: int = DEFAULT_DECK_SIZE,
+) -> Deck:
+    if style not in COST_CURVES:
+        raise ValueError(
+            f"Unknown style {style!r}. Available: {sorted(COST_CURVES)}"
+        )
+    leader = cards[leader_idx]
+    if leader.get("card_type") != "Leader":
+        raise ValueError(
+            f"Card at index {leader_idx} ({leader.get('id')!r}) is not a Leader"
+        )
+    # Pull every color-legal candidate (synergy-ranked, leaders/self excluded).
+    all_hits = recommend_synergy(leader_idx, cards, matrix, k=len(cards))
+    # Group by cost bucket; cost None goes into a separate "no-cost" pile
+    # and is only used during backfill (most OPTCG cards have a cost).
+    by_bucket: dict[int, list] = defaultdict(list)
+    no_cost: list = []
+    for hit in all_hits:
+        if hit.cost is None:
+            no_cost.append(hit)
+        else:
+            by_bucket[min(int(hit.cost), 8)].append(hit)
+    target = COST_CURVES[style]
+    deck: list[DeckCard] = []
+    copies_used: dict[str, int] = defaultdict(int)
+    total = 0
+    # Pass 1: fill each bucket from its top-synergy candidates.
+    for cost in sorted(target.keys()):
+        want = min(target[cost], deck_size - total)
+        taken = 0
+        for hit in by_bucket.get(cost, []):
+            if taken >= want:
+                break
+            available = max_copies - copies_used[hit.card_id]
+            if available <= 0:
+                continue
+            qty = min(available, want - taken)
+            deck.append(_to_deck_card(hit, qty, cards))
+            copies_used[hit.card_id] += qty
+            taken += qty
+            total += qty
+    # Pass 2: backfill any remainder from the highest-synergy candidates
+    # not yet at their copy cap, regardless of cost. This is what makes
+    # the size invariant hold even when the target curve is unfillable
+    # at exact cost slots.
+    if total < deck_size:
+        for hit in all_hits:
+            if total >= deck_size:
+                break
+            available = max_copies - copies_used[hit.card_id]
+            if available <= 0:
+                continue
+            qty = min(available, deck_size - total)
+            # If we've already added this card, bump its quantity rather
+            # than appending a duplicate row.
+            existing = next((dc for dc in deck if dc.card_id == hit.card_id), None)
+            if existing is None:
+                deck.append(_to_deck_card(hit, qty, cards))
+            else:
+                deck[deck.index(existing)] = _bump_quantity(existing, qty)
+            copies_used[hit.card_id] += qty
+            total += qty
+    # Sort the final deck by cost (asc), then synergy (desc) for nice display.
+    deck.sort(key=lambda dc: (dc.cost if dc.cost is not None else 99, -dc.synergy_score))
+    return Deck(
+        leader=leader,
+        cards=deck,
+        style=style,
+        target_curve=dict(target),
+    )
+def _to_deck_card(hit, qty: int, cards: list[dict[str, Any]]) -> DeckCard:
+    # `hit` carries most fields; we look up family/rarity from the source
+    # card by id since SynergyHit doesn't carry them.
+    full = next((c for c in cards if c.get("id") == hit.card_id), None) or {}
+    return DeckCard(
+        card_id=hit.card_id,
+        name=hit.name,
+        quantity=qty,
+        cost=hit.cost,
+        card_type=hit.card_type,
+        colors=hit.colors,
+        family=list(full.get("family") or []),
+        rarity=str(full.get("rarity") or ""),
+        set_code=hit.set_code,
+        synergy_score=hit.total_score,
+        family_match=hit.family_match,
+    )
+def _bump_quantity(dc: DeckCard, extra: int) -> DeckCard:
+    return DeckCard(
+        card_id=dc.card_id,
+        name=dc.name,
+        quantity=dc.quantity + extra,
+        cost=dc.cost,
+        card_type=dc.card_type,
+        colors=dc.colors,
+        family=dc.family,
+        rarity=dc.rarity,
+        set_code=dc.set_code,
+        synergy_score=dc.synergy_score,
+        family_match=dc.family_match,
+    )
+def deck_to_text(deck: Deck) -> str:
+    """Plain-text deck list. Format mirrors common OPTCG-Sim conventions:
+    one card per line, `<qty>x <ID> <Name>`, plus a summary header.
+    """
+    lines: list[str] = []
+    leader = deck.leader
+    lines.append("# OPTCG deck")
+    lines.append(f"# Style: {deck.style}")
+    lines.append(f"# Total: {deck.total_quantity} cards")
+    lines.append(f"# Avg cost: {deck.avg_cost:.2f}")
+    lines.append("")
+    lines.append("## Leader")
+    lines.append(f"1x {leader.get('id', '?')} {leader.get('name', '?')}")
+    lines.append("")
+    lines.append("## Main deck")
+    for dc in deck.cards:
+        lines.append(f"{dc.quantity}x {dc.card_id} {dc.name}")
+    return "\n".join(lines)

spaceutil/plot.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""Plotly figures for the deck-builder Space.
+Three breakdown charts displayed alongside a generated deck:
+  - cost curve (bar): quantities by cost bucket vs. the target curve
+  - type breakdown (bar): Character/Event/Stage counts
+  - color breakdown (bar): cards per color
+Color hex map mirrors the upstream `optcg_cards.visualize._first_color_hex`
+so palettes stay consistent across all OPTCG-related Spaces.
+"""
+from __future__ import annotations
+from typing import TYPE_CHECKING
+from optcg_cards.visualize import _first_color_hex
+if TYPE_CHECKING:
+    from spaceutil.deck import Deck
+def _empty_fig(message: str, height: int = 240):
+    import plotly.graph_objects as go
+    fig = go.Figure()
+    fig.add_annotation(
+        text=message,
+        xref="paper", yref="paper", x=0.5, y=0.5,
+        showarrow=False, font=dict(color="gray"),
+    )
+    fig.update_layout(
+        xaxis=dict(visible=False),
+        yaxis=dict(visible=False),
+        plot_bgcolor="white",
+        margin=dict(l=40, r=40, t=40, b=40),
+        height=height,
+    )
+    return fig
+def build_cost_curve_figure(deck: Deck | None, height: int = 280):
+    """Bar chart comparing the deck's actual cost curve vs. the style target."""
+    import plotly.graph_objects as go
+    if deck is None or deck.total_quantity == 0:
+        return _empty_fig("Build a deck to see its cost curve.", height=height)
+    actual = deck.cost_distribution
+    target = deck.target_curve or {}
+    buckets = sorted(set(actual) | set(target))
+    labels = [("8+" if b == 8 else str(b)) for b in buckets]
+    actual_y = [actual.get(b, 0) for b in buckets]
+    target_y = [target.get(b, 0) for b in buckets]
+    fig = go.Figure(data=[
+        go.Bar(name="Actual", x=labels, y=actual_y, marker_color="#dc3545"),
+        go.Bar(name="Target", x=labels, y=target_y, marker_color="#1f77b4", opacity=0.5),
+    ])
+    fig.update_layout(
+        title="Cost curve: actual vs. target",
+        xaxis=dict(title="Cost"),
+        yaxis=dict(title="Cards"),
+        barmode="group",
+        plot_bgcolor="white",
+        margin=dict(l=40, r=40, t=60, b=40),
+        height=height,
+        legend=dict(orientation="h", y=1.0, yanchor="bottom"),
+    )
+    return fig
+def build_type_breakdown_figure(deck: Deck | None, height: int = 240):
+    import plotly.graph_objects as go
+    if deck is None or deck.total_quantity == 0:
+        return _empty_fig("Build a deck to see its type mix.", height=height)
+    dist = deck.type_distribution
+    types = sorted(dist.keys())
+    counts = [dist[t] for t in types]
+    fig = go.Figure(data=[go.Bar(x=types, y=counts, marker_color="#6c757d")])
+    fig.update_layout(
+        title="Card type mix",
+        xaxis=dict(title=""),
+        yaxis=dict(title="Cards"),
+        plot_bgcolor="white",
+        margin=dict(l=40, r=40, t=60, b=40),
+        height=height,
+    )
+    return fig
+def build_color_breakdown_figure(deck: Deck | None, height: int = 240):
+    import plotly.graph_objects as go
+    if deck is None or deck.total_quantity == 0:
+        return _empty_fig("Build a deck to see its color mix.", height=height)
+    dist = deck.color_distribution
+    colors = sorted(dist.keys())
+    counts = [dist[c] for c in colors]
+    bar_colors = [_first_color_hex([c]) for c in colors]
+    fig = go.Figure(data=[go.Bar(x=colors, y=counts, marker_color=bar_colors)])
+    fig.update_layout(
+        title="Color mix (per copy)",
+        xaxis=dict(title=""),
+        yaxis=dict(title="Cards (counted per color of multicolor cards)"),
+        plot_bgcolor="white",
+        margin=dict(l=40, r=40, t=60, b=40),
+        height=height,
+    )
+    return fig

spaceutil/synergy.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""Synergy recommendations anchored on a Leader card.
+Synergy is not the same as raw cosine similarity. The "Browse / find
+similar" tab gives you mechanically-similar cards (good for swap
+candidates), but two near-identical "low cost red blockers" both
+compete for the same deck slot. Synergy here means: cards that are
+*color-legal* under the chosen leader and that *amplify the leader's
+strategy*, with a small thumb on the scale for cards in the leader's
+family or archetype.
+Score = cosine_similarity(leader, card) + family_bonus(card)
+  - color_overlap(leader, card) is required (filtered out otherwise).
+    OPTCG decks must share at least one color with the leader.
+  - family_overlap(leader, card) adds a fixed bonus (default 0.10) -
+    enough to outrank a marginally-more-similar but off-archetype card,
+    not so much that it drowns the embedding signal.
+  - Other Leader cards are excluded - decks have exactly one Leader.
+  - The leader itself is excluded.
+"""
+from __future__ import annotations
+from collections.abc import Iterable
+from dataclasses import dataclass
+from typing import Any
+import numpy as np
+FAMILY_BONUS = 0.10
+@dataclass(frozen=True)
+class SynergyHit:
+    rank: int
+    card_id: str
+    name: str
+    base_score: float
+    family_bonus: float
+    total_score: float
+    family_match: bool
+    card_type: str
+    colors: list[str]
+    cost: int | None
+    set_code: str
+def color_overlap(a: Iterable[str] | None, b: Iterable[str] | None) -> bool:
+    if not a or not b:
+        return False
+    return bool(set(a) & set(b))
+def family_overlap(a: Iterable[str] | None, b: Iterable[str] | None) -> bool:
+    if not a or not b:
+        return False
+    return bool(set(a) & set(b))
+def recommend_synergy(
+    leader_idx: int,
+    cards: list[dict[str, Any]],
+    matrix: np.ndarray,
+    k: int = 30,
+    family_bonus: float = FAMILY_BONUS,
+) -> list[SynergyHit]:
+    leader = cards[leader_idx]
+    if leader.get("card_type") != "Leader":
+        raise ValueError(
+            f"Card at index {leader_idx} ({leader.get('id')!r}) is not a Leader"
+        )
+    leader_colors = leader.get("colors") or []
+    leader_family = leader.get("family") or []
+    leader_vec = matrix[leader_idx]
+    base_scores = matrix @ leader_vec  # (N,)
+    candidates: list[tuple[int, float, float, bool]] = []
+    for idx, card in enumerate(cards):
+        if idx == leader_idx:
+            continue
+        if card.get("card_type") == "Leader":
+            continue
+        if not color_overlap(leader_colors, card.get("colors")):
+            continue
+        base = float(base_scores[idx])
+        f_match = family_overlap(leader_family, card.get("family"))
+        bonus = family_bonus if f_match else 0.0
+        candidates.append((idx, base, bonus, f_match))
+    candidates.sort(key=lambda x: -(x[1] + x[2]))
+    hits: list[SynergyHit] = []
+    for rank, (idx, base, bonus, f_match) in enumerate(candidates[:k], start=1):
+        card = cards[idx]
+        cost = card.get("cost")
+        if isinstance(cost, float):
+            cost = int(cost) if not np.isnan(cost) else None
+        hits.append(
+            SynergyHit(
+                rank=rank,
+                card_id=str(card.get("id", "")),
+                name=str(card.get("name", "")),
+                base_score=base,
+                family_bonus=bonus,
+                total_score=base + bonus,
+                family_match=f_match,
+                card_type=str(card.get("card_type", "")),
+                colors=list(card.get("colors") or []),
+                cost=cost,
+                set_code=str(card.get("set_code", "")),
+            )
+        )
+    return hits
+def cost_curve(hits: list[SynergyHit], max_cost: int = 10) -> dict[int, int]:
+    counts: dict[int, int] = {}
+    for h in hits:
+        if h.cost is None:
+            continue
+        c = min(int(h.cost), max_cost)
+        counts[c] = counts.get(c, 0) + 1
+    return counts

tests/__init__.py ADDED Viewed

File without changes

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""Test fixtures for the deck-builder Space.
+80 synthetic cards with 1024-dim L2-normalized embeddings. The volume
+matters here: a 50-card deck with up to 4 copies per card needs at
+minimum 13 unique candidates per cost bucket to fill cleanly. 80 cards
+across 4 types and 8 color groupings gives enough variety for
+deck-builder tests to exercise both the cost-curve targeting and the
+backfill paths.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+import numpy as np
+import pandas as pd
+import pytest
+from optcg_cards.provenance import (
+    EmbedProvenance,
+    FetchProvenance,
+    write_provenance,
+)
+EMBEDDING_DIM = 1024
+N_CARDS = 200
+# 6 base colors + 6 adjacent bi-color combos. Bi-colors widen the
+# candidate pool for any chosen leader (a Red leader can also draft
+# Red/Green and Red/Black cards), so the deck builder has room to fill
+# 50 slots at <=4 copies each.
+_COLORS_POOL = [
+    ["Red"], ["Green"], ["Blue"], ["Purple"], ["Black"], ["Yellow"],
+    ["Red", "Green"], ["Green", "Blue"], ["Blue", "Purple"],
+    ["Purple", "Black"], ["Black", "Yellow"], ["Yellow", "Red"],
+]
+_CARD_TYPES = ["Character", "Event", "Stage", "Leader"]
+_RARITIES = ["C", "UC", "R", "SR", "L"]
+_FAMILIES = [
+    ["Straw Hat Crew"],
+    ["Animal Kingdom Pirates"],
+    ["Marines"],
+    ["Worst Generation"],
+    ["Big Mom Pirates"],
+]
+def _color_for(i: int) -> list[str]:
+    # 5 is coprime with 12, so type (i%4) and color cycles never align.
+    return _COLORS_POOL[(i * 5 + 1) % len(_COLORS_POOL)]
+def _unit_vector(rng: np.random.Generator, dim: int) -> list[float]:
+    v = rng.standard_normal(dim).astype(np.float32)
+    v /= np.linalg.norm(v)
+    return v.tolist()
+@pytest.fixture
+def synthetic_cards() -> list[dict[str, Any]]:
+    rng = np.random.default_rng(seed=42)
+    cards: list[dict[str, Any]] = []
+    for i in range(N_CARDS):
+        ctype = _CARD_TYPES[i % len(_CARD_TYPES)]
+        cards.append(
+            {
+                "id": f"OP01-{i:03d}",
+                "code": f"OP01-{i:03d}",
+                "name": f"Card {i}",
+                "card_type": ctype,
+                "colors": _color_for(i),
+                # Spread costs 1-9 with a few stages at None
+                "cost": None if (ctype == "Stage" and i % 8 == 3) else (1 + i % 9),
+                "power": 1000 * (1 + i % 9),
+                "counter": (i % 3) * 1000 if (i % 3) else None,
+                "life": 5 if ctype == "Leader" else None,
+                "attribute": "Slash" if i % 2 else "Strike",
+                "family": _FAMILIES[i % len(_FAMILIES)],
+                "effect_text": f"Effect for card {i}.",
+                "trigger_text": "",
+                "rarity": _RARITIES[i % len(_RARITIES)],
+                "pack_id": "OP01",
+                "set_code": "OP01",
+                "set_name": "Romance Dawn",
+                "language": "en",
+                "umap_x": float(rng.uniform(-10, 10)),
+                "umap_y": float(rng.uniform(-10, 10)),
+                "embedding": _unit_vector(rng, EMBEDDING_DIM),
+            }
+        )
+    return cards
+@pytest.fixture
+def synthetic_embed_provenance() -> EmbedProvenance:
+    return EmbedProvenance(
+        model_id="Qwen/Qwen3-Embedding-0.6B",
+        embedding_dim=EMBEDDING_DIM,
+        matryoshka_dim=None,
+        task_instruction=(
+            "Instruct: Represent this One Piece Card Game card so that "
+            "mechanically similar cards are close in embedding space.\n"
+            "Text: {card_document}"
+        ),
+        embedded_at="2026-05-14T00:00:00+00:00",
+        sentence_transformers_version="5.4.1",
+    )
+@pytest.fixture
+def synthetic_fetch_provenance() -> FetchProvenance:
+    return FetchProvenance(
+        source="vegapull",
+        source_url="https://en.onepiece-cardgame.com/cardlist/",
+        source_attribution="vegapull scraping en.onepiece-cardgame.com",
+        source_fetched_at="2026-05-14T00:00:00+00:00",
+        language="en",
+        n_cards=N_CARDS,
+        pack_ids_included=["OP01"],
+        latest_pack_id="OP01",
+        vegapull_version="1.2.2",
+    )
+@pytest.fixture
+def synthetic_repo(
+    tmp_path: Path,
+    synthetic_cards: list[dict[str, Any]],
+    synthetic_fetch_provenance: FetchProvenance,
+    synthetic_embed_provenance: EmbedProvenance,
+) -> dict[str, Path]:
+    parquet_path = tmp_path / "cards_with_embeddings.parquet"
+    pd.DataFrame(synthetic_cards).to_parquet(parquet_path, index=False)
+    prov_path = tmp_path / "provenance.json"
+    write_provenance(
+        prov_path,
+        fetch=synthetic_fetch_provenance,
+        embed=synthetic_embed_provenance,
+    )
+    return {"parquet": parquet_path, "provenance": prov_path, "root": tmp_path}
+@pytest.fixture
+def patched_hf_download(
+    monkeypatch: pytest.MonkeyPatch,
+    synthetic_repo: dict[str, Path],
+):
+    """Patch huggingface_hub.hf_hub_download so spaceutil.data.load_corpus
+    pulls from the local synthetic_repo instead of the network."""
+    def fake_download(
+        repo_id: str,
+        filename: str,
+        repo_type: str | None = None,
+        token: str | None = None,
+        **kwargs: Any,
+    ) -> str:
+        if filename == "cards_with_embeddings.parquet":
+            return str(synthetic_repo["parquet"])
+        if filename == "provenance.json":
+            return str(synthetic_repo["provenance"])
+        raise FileNotFoundError(f"Unexpected filename in synthetic repo: {filename}")
+    import huggingface_hub
+    monkeypatch.setattr(huggingface_hub, "hf_hub_download", fake_download)
+    try:
+        import spaceutil.data as data_mod
+        monkeypatch.setattr(data_mod, "hf_hub_download", fake_download, raising=False)
+    except ImportError:
+        pass
+    return fake_download

tests/test_data.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""TDD for spaceutil.data.load_corpus."""
+from __future__ import annotations
+import logging
+import numpy as np
+from optcg_cards.provenance import EmbedProvenance
+def test_load_corpus_returns_expected_shape(patched_hf_download):
+    from spaceutil.data import load_corpus
+    cards, matrix, embed_prov, id_to_idx = load_corpus(token="fake-token")
+    assert isinstance(cards, list)
+    assert len(cards) == 200
+    assert isinstance(matrix, np.ndarray)
+    assert matrix.shape == (200, 1024)
+    assert matrix.dtype == np.float32
+    assert isinstance(embed_prov, EmbedProvenance)
+    assert isinstance(id_to_idx, dict)
+    assert len(id_to_idx) == 200
+def test_embedding_key_dropped_from_cards(patched_hf_download):
+    from spaceutil.data import load_corpus
+    cards, _, _, _ = load_corpus(token="fake-token")
+    for card in cards:
+        assert "embedding" not in card, "embedding column must be stripped after stacking"
+def test_list_columns_coerced_to_python_lists(patched_hf_download):
+    from spaceutil.data import load_corpus
+    cards, _, _, _ = load_corpus(token="fake-token")
+    for card in cards:
+        assert isinstance(card["colors"], list), "colors must be list, not ndarray"
+        assert not isinstance(card["colors"], np.ndarray)
+        if card["family"] is not None:
+            assert isinstance(card["family"], list)
+            assert not isinstance(card["family"], np.ndarray)
+def test_id_to_idx_consistency(patched_hf_download):
+    from spaceutil.data import load_corpus
+    cards, matrix, _, id_to_idx = load_corpus(token="fake-token")
+    for card in cards:
+        idx = id_to_idx[card["id"]]
+        assert cards[idx]["id"] == card["id"]
+        assert matrix[idx].shape == (1024,)
+def test_provenance_recovered(patched_hf_download):
+    from spaceutil.data import load_corpus
+    _, _, embed_prov, _ = load_corpus(token="fake-token")
+    assert embed_prov.model_id == "Qwen/Qwen3-Embedding-0.6B"
+    assert embed_prov.embedding_dim == 1024
+    assert "Instruct" in embed_prov.task_instruction
+    assert "{card_document}" in embed_prov.task_instruction
+def test_no_image_url_columns_exposed(patched_hf_download):
+    """CLAUDE.md hard rule: no image/url/art columns."""
+    from spaceutil.data import load_corpus
+    cards, _, _, _ = load_corpus(token="fake-token")
+    forbidden_substrings = ("image", "art_url", "thumbnail", "img_")
+    for card in cards:
+        for key in card:
+            for sub in forbidden_substrings:
+                assert sub not in key.lower(), f"forbidden column {key!r}"
+def test_token_never_logged(patched_hf_download, caplog):
+    """HF_TOKEN must not appear in captured logs."""
+    from spaceutil.data import load_corpus
+    secret = "hf_super_secret_token_12345"
+    with caplog.at_level(logging.DEBUG):
+        load_corpus(token=secret)
+    for record in caplog.records:
+        assert secret not in record.getMessage()
+        assert secret not in str(record.args or "")
+def test_matrix_is_l2_normalized(patched_hf_download):
+    """Synthetic vectors are pre-normalized; load_corpus must preserve that."""
+    from spaceutil.data import load_corpus
+    _, matrix, _, _ = load_corpus(token="fake-token")
+    norms = np.linalg.norm(matrix, axis=1)
+    np.testing.assert_allclose(norms, 1.0, atol=1e-5)
+def test_load_corpus_accepts_none_token(patched_hf_download):
+    """After the HF repo is flipped public, token becomes optional."""
+    from spaceutil.data import load_corpus
+    cards, matrix, embed_prov, id_to_idx = load_corpus(token=None)
+    assert len(cards) == 200
+    assert matrix.shape == (200, 1024)

tests/test_deck.py ADDED Viewed

	@@ -0,0 +1,222 @@

+"""TDD for spaceutil.deck.build_deck.
+Hard invariants the builder must always satisfy:
+  - Total quantity is exactly 50.
+  - Every card shares at least one color with the leader.
+  - No card has more than `max_copies` (default 4).
+  - The leader itself is never in the main deck.
+  - No other Leader cards are in the main deck.
+Soft invariants (style behaviour):
+  - Aggro decks have lower average cost than midrange.
+  - Control decks have higher average cost than midrange.
+"""
+from __future__ import annotations
+import numpy as np
+import pytest
+def _matrix(cards):
+    return np.stack(
+        [np.asarray(c["embedding"], dtype=np.float32) for c in cards], axis=0
+    )
+def _strip_emb(cards):
+    return [{k: v for k, v in c.items() if k != "embedding"} for c in cards]
+def _first_leader_idx(cards):
+    return next(i for i, c in enumerate(cards) if c["card_type"] == "Leader")
+class TestDeckSize:
+    def test_total_quantity_is_50(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        assert deck.total_quantity == 50
+    def test_total_quantity_is_50_for_each_style(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        for style in ("aggro", "midrange", "control"):
+            deck = build_deck(idx, cards, matrix, style=style)
+            assert deck.total_quantity == 50, f"{style} produced {deck.total_quantity}"
+class TestColorLegality:
+    def test_all_cards_share_a_color_with_leader(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        leader_colors = set(synthetic_cards[idx]["colors"])
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        for dc in deck.cards:
+            assert set(dc.colors) & leader_colors, (
+                f"{dc.card_id} has {dc.colors}, leader has {leader_colors}"
+            )
+class TestCopyLimit:
+    def test_no_card_exceeds_max_copies_default(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        for dc in deck.cards:
+            assert 1 <= dc.quantity <= 4
+    def test_no_card_exceeds_max_copies_explicit(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange", max_copies=2)
+        assert deck.total_quantity == 50
+        for dc in deck.cards:
+            assert 1 <= dc.quantity <= 2
+class TestLeaderExclusion:
+    def test_other_leaders_not_in_deck(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        for dc in deck.cards:
+            assert dc.card_type != "Leader"
+    def test_chosen_leader_not_in_main_deck(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        leader_id = synthetic_cards[idx]["id"]
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        for dc in deck.cards:
+            assert dc.card_id != leader_id
+    def test_raises_when_index_is_not_leader(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        non_leader_idx = next(
+            i for i, c in enumerate(synthetic_cards) if c["card_type"] != "Leader"
+        )
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        with pytest.raises(ValueError, match="not a Leader"):
+            build_deck(non_leader_idx, cards, matrix)
+class TestDeckMetadata:
+    def test_deck_carries_leader_reference(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        leader_id = synthetic_cards[idx]["id"]
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        assert deck.leader["id"] == leader_id
+        assert deck.style == "midrange"
+    def test_avg_cost_computed(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        assert deck.avg_cost > 0
+        # 1-9 cost spread, midrange should land somewhere reasonable
+        assert 1.5 < deck.avg_cost < 8.0
+class TestStyleSensitivity:
+    def test_aggro_cheaper_than_midrange(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        aggro = build_deck(idx, cards, matrix, style="aggro")
+        midrange = build_deck(idx, cards, matrix, style="midrange")
+        assert aggro.avg_cost < midrange.avg_cost, (
+            f"aggro avg {aggro.avg_cost:.2f} not < midrange {midrange.avg_cost:.2f}"
+        )
+    def test_control_pricier_than_midrange(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        midrange = build_deck(idx, cards, matrix, style="midrange")
+        control = build_deck(idx, cards, matrix, style="control")
+        assert control.avg_cost > midrange.avg_cost, (
+            f"control avg {control.avg_cost:.2f} not > midrange {midrange.avg_cost:.2f}"
+        )
+    def test_unknown_style_raises(self, synthetic_cards):
+        from spaceutil.deck import build_deck
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        with pytest.raises(ValueError, match="style"):
+            build_deck(idx, cards, matrix, style="bogus")
+class TestExport:
+    def test_to_text_format(self, synthetic_cards):
+        from spaceutil.deck import build_deck, deck_to_text
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        text = deck_to_text(deck)
+        assert deck.leader["id"] in text
+        assert deck.leader["name"] in text
+        for dc in deck.cards:
+            assert f"{dc.quantity}x {dc.card_id}" in text
+        # Sanity: at least one section header so it's human-readable
+        assert "Leader" in text and "Main deck" in text
+    def test_to_text_total_quantity_in_summary(self, synthetic_cards):
+        from spaceutil.deck import build_deck, deck_to_text
+        idx = _first_leader_idx(synthetic_cards)
+        matrix = _matrix(synthetic_cards)
+        cards = _strip_emb(synthetic_cards)
+        deck = build_deck(idx, cards, matrix, style="midrange")
+        text = deck_to_text(deck)
+        assert "50" in text

tests/test_plot.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""TDD for spaceutil.plot - the deck-builder figures."""
+from __future__ import annotations
+import numpy as np
+def _matrix(cards):
+    return np.stack(
+        [np.asarray(c["embedding"], dtype=np.float32) for c in cards], axis=0
+    )
+def _strip_emb(cards):
+    return [{k: v for k, v in c.items() if k != "embedding"} for c in cards]
+def _build_a_deck(synthetic_cards):
+    from spaceutil.deck import build_deck
+    idx = next(i for i, c in enumerate(synthetic_cards) if c["card_type"] == "Leader")
+    return build_deck(idx, _strip_emb(synthetic_cards), _matrix(synthetic_cards))
+class TestEmptyState:
+    def test_cost_curve_with_no_deck(self):
+        import plotly.graph_objects as go
+        from spaceutil.plot import build_cost_curve_figure
+        fig = build_cost_curve_figure(None)
+        assert isinstance(fig, go.Figure)
+        assert len(fig.data) == 0
+        assert any("build a deck" in a.text.lower() for a in fig.layout.annotations)
+    def test_type_breakdown_with_no_deck(self):
+        from spaceutil.plot import build_type_breakdown_figure
+        fig = build_type_breakdown_figure(None)
+        assert len(fig.data) == 0
+    def test_color_breakdown_with_no_deck(self):
+        from spaceutil.plot import build_color_breakdown_figure
+        fig = build_color_breakdown_figure(None)
+        assert len(fig.data) == 0
+class TestBuiltDeck:
+    def test_cost_curve_has_two_bars(self, synthetic_cards):
+        from spaceutil.plot import build_cost_curve_figure
+        deck = _build_a_deck(synthetic_cards)
+        fig = build_cost_curve_figure(deck)
+        # actual + target traces
+        assert len(fig.data) == 2
+        names = {trace.name for trace in fig.data}
+        assert names == {"Actual", "Target"}
+    def test_type_breakdown_returns_one_trace(self, synthetic_cards):
+        from spaceutil.plot import build_type_breakdown_figure
+        deck = _build_a_deck(synthetic_cards)
+        fig = build_type_breakdown_figure(deck)
+        assert len(fig.data) == 1
+        # Should include at least one type
+        assert sum(fig.data[0].y) > 0
+    def test_color_breakdown_includes_leader_color(self, synthetic_cards):
+        from spaceutil.plot import build_color_breakdown_figure
+        deck = _build_a_deck(synthetic_cards)
+        fig = build_color_breakdown_figure(deck)
+        assert len(fig.data) == 1
+        # Every color appearing on the chart should appear on at least one card
+        on_chart_colors = set(fig.data[0].x)
+        actual_colors = set(deck.color_distribution.keys())
+        assert on_chart_colors == actual_colors