Spaces:
Sleeping
Sleeping
File size: 5,755 Bytes
16eaadc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 | """Test fixtures for the deck-builder Space.
80 synthetic cards with 1024-dim L2-normalized embeddings. The volume
matters here: a 50-card deck with up to 4 copies per card needs at
minimum 13 unique candidates per cost bucket to fill cleanly. 80 cards
across 4 types and 8 color groupings gives enough variety for
deck-builder tests to exercise both the cost-curve targeting and the
backfill paths.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any
import numpy as np
import pandas as pd
import pytest
from optcg_cards.provenance import (
EmbedProvenance,
FetchProvenance,
write_provenance,
)
EMBEDDING_DIM = 1024
N_CARDS = 200
# 6 base colors + 6 adjacent bi-color combos. Bi-colors widen the
# candidate pool for any chosen leader (a Red leader can also draft
# Red/Green and Red/Black cards), so the deck builder has room to fill
# 50 slots at <=4 copies each.
_COLORS_POOL = [
["Red"], ["Green"], ["Blue"], ["Purple"], ["Black"], ["Yellow"],
["Red", "Green"], ["Green", "Blue"], ["Blue", "Purple"],
["Purple", "Black"], ["Black", "Yellow"], ["Yellow", "Red"],
]
_CARD_TYPES = ["Character", "Event", "Stage", "Leader"]
_RARITIES = ["C", "UC", "R", "SR", "L"]
_FAMILIES = [
["Straw Hat Crew"],
["Animal Kingdom Pirates"],
["Marines"],
["Worst Generation"],
["Big Mom Pirates"],
]
def _color_for(i: int) -> list[str]:
# 5 is coprime with 12, so type (i%4) and color cycles never align.
return _COLORS_POOL[(i * 5 + 1) % len(_COLORS_POOL)]
def _unit_vector(rng: np.random.Generator, dim: int) -> list[float]:
v = rng.standard_normal(dim).astype(np.float32)
v /= np.linalg.norm(v)
return v.tolist()
@pytest.fixture
def synthetic_cards() -> list[dict[str, Any]]:
rng = np.random.default_rng(seed=42)
cards: list[dict[str, Any]] = []
for i in range(N_CARDS):
ctype = _CARD_TYPES[i % len(_CARD_TYPES)]
cards.append(
{
"id": f"OP01-{i:03d}",
"code": f"OP01-{i:03d}",
"name": f"Card {i}",
"card_type": ctype,
"colors": _color_for(i),
# Spread costs 1-9 with a few stages at None
"cost": None if (ctype == "Stage" and i % 8 == 3) else (1 + i % 9),
"power": 1000 * (1 + i % 9),
"counter": (i % 3) * 1000 if (i % 3) else None,
"life": 5 if ctype == "Leader" else None,
"attribute": "Slash" if i % 2 else "Strike",
"family": _FAMILIES[i % len(_FAMILIES)],
"effect_text": f"Effect for card {i}.",
"trigger_text": "",
"rarity": _RARITIES[i % len(_RARITIES)],
"pack_id": "OP01",
"set_code": "OP01",
"set_name": "Romance Dawn",
"language": "en",
"umap_x": float(rng.uniform(-10, 10)),
"umap_y": float(rng.uniform(-10, 10)),
"embedding": _unit_vector(rng, EMBEDDING_DIM),
}
)
return cards
@pytest.fixture
def synthetic_embed_provenance() -> EmbedProvenance:
return EmbedProvenance(
model_id="Qwen/Qwen3-Embedding-0.6B",
embedding_dim=EMBEDDING_DIM,
matryoshka_dim=None,
task_instruction=(
"Instruct: Represent this One Piece Card Game card so that "
"mechanically similar cards are close in embedding space.\n"
"Text: {card_document}"
),
embedded_at="2026-05-14T00:00:00+00:00",
sentence_transformers_version="5.4.1",
)
@pytest.fixture
def synthetic_fetch_provenance() -> FetchProvenance:
return FetchProvenance(
source="vegapull",
source_url="https://en.onepiece-cardgame.com/cardlist/",
source_attribution="vegapull scraping en.onepiece-cardgame.com",
source_fetched_at="2026-05-14T00:00:00+00:00",
language="en",
n_cards=N_CARDS,
pack_ids_included=["OP01"],
latest_pack_id="OP01",
vegapull_version="1.2.2",
)
@pytest.fixture
def synthetic_repo(
tmp_path: Path,
synthetic_cards: list[dict[str, Any]],
synthetic_fetch_provenance: FetchProvenance,
synthetic_embed_provenance: EmbedProvenance,
) -> dict[str, Path]:
parquet_path = tmp_path / "cards_with_embeddings.parquet"
pd.DataFrame(synthetic_cards).to_parquet(parquet_path, index=False)
prov_path = tmp_path / "provenance.json"
write_provenance(
prov_path,
fetch=synthetic_fetch_provenance,
embed=synthetic_embed_provenance,
)
return {"parquet": parquet_path, "provenance": prov_path, "root": tmp_path}
@pytest.fixture
def patched_hf_download(
monkeypatch: pytest.MonkeyPatch,
synthetic_repo: dict[str, Path],
):
"""Patch huggingface_hub.hf_hub_download so spaceutil.data.load_corpus
pulls from the local synthetic_repo instead of the network."""
def fake_download(
repo_id: str,
filename: str,
repo_type: str | None = None,
token: str | None = None,
**kwargs: Any,
) -> str:
if filename == "cards_with_embeddings.parquet":
return str(synthetic_repo["parquet"])
if filename == "provenance.json":
return str(synthetic_repo["provenance"])
raise FileNotFoundError(f"Unexpected filename in synthetic repo: {filename}")
import huggingface_hub
monkeypatch.setattr(huggingface_hub, "hf_hub_download", fake_download)
try:
import spaceutil.data as data_mod
monkeypatch.setattr(data_mod, "hf_hub_download", fake_download, raising=False)
except ImportError:
pass
return fake_download
|