Spaces:

t22000t
/

optcg-explorer

Sleeping

App Files Files Community

optcg-explorer / tests /conftest.py

t22000t

Initial commit: optcg-explorer Gradio Space

3ab07bd 4 days ago

raw

history blame contribute delete

5.48 kB

	"""Test fixtures for optcg-explorer-space.

	A 20-row synthetic corpus at 1024-dim mimics the shape of the published
	`cards_with_embeddings.parquet`. Embeddings are random unit vectors with
	a fixed seed so neighbour rankings are deterministic.
	"""

	from __future__ import annotations

	from pathlib import Path
	from typing import Any

	import numpy as np
	import pandas as pd
	import pytest
	from optcg_cards.provenance import (
	EmbedProvenance,
	FetchProvenance,
	write_provenance,
	)

	EMBEDDING_DIM = 1024
	N_CARDS = 20

	_COLORS_POOL = [
	["Red"],
	["Green"],
	["Blue"],
	["Purple"],
	["Black"],
	["Yellow"],
	["Red", "Green"],
	["Blue", "Yellow"],
	]
	_CARD_TYPES = ["Character", "Event", "Stage", "Leader"]
	_RARITIES = ["C", "UC", "R", "SR", "L"]


	def _unit_vector(rng: np.random.Generator, dim: int) -> list[float]:
	v = rng.standard_normal(dim).astype(np.float32)
	v /= np.linalg.norm(v)
	return v.tolist()


	@pytest.fixture
	def synthetic_cards() -> list[dict[str, Any]]:
	"""20 OPTCG-shaped cards with 1024-dim L2-normalized embeddings."""
	rng = np.random.default_rng(seed=42)
	cards: list[dict[str, Any]] = []
	for i in range(N_CARDS):
	cards.append(
	{
	"id": f"OP01-{i:03d}",
	"code": f"OP01-{i:03d}",
	"name": f"Card {i}",
	"card_type": _CARD_TYPES[i % len(_CARD_TYPES)],
	"colors": _COLORS_POOL[i % len(_COLORS_POOL)],
	"cost": (i % 10),
	"power": 1000 * (1 + i % 9),
	"counter": (i % 3) * 1000 if (i % 3) else None,
	"life": 5 if _CARD_TYPES[i % len(_CARD_TYPES)] == "Leader" else None,
	"attribute": "Slash" if i % 2 else "Strike",
	"family": ["Straw Hat Crew"] if i % 2 else ["Animal Kingdom Pirates"],
	"effect_text": f"Effect for card {i}. Blocker. Draw 1." if i % 4 == 0 else f"Effect for card {i}.",
	"trigger_text": "Trigger: Draw 1." if i % 5 == 0 else "",
	"rarity": _RARITIES[i % len(_RARITIES)],
	"pack_id": "OP01",
	"set_code": "OP01",
	"set_name": "Romance Dawn",
	"language": "en",
	"umap_x": float(rng.uniform(-10, 10)),
	"umap_y": float(rng.uniform(-10, 10)),
	"embedding": _unit_vector(rng, EMBEDDING_DIM),
	}
	)
	return cards


	@pytest.fixture
	def synthetic_embed_provenance() -> EmbedProvenance:
	return EmbedProvenance(
	model_id="Qwen/Qwen3-Embedding-0.6B",
	embedding_dim=EMBEDDING_DIM,
	matryoshka_dim=None,
	task_instruction=(
	"Instruct: Represent this One Piece Card Game card so that "
	"mechanically similar cards are close in embedding space.\n"
	"Text: {card_document}"
	),
	embedded_at="2026-05-13T00:00:00+00:00",
	sentence_transformers_version="5.4.1",
	)


	@pytest.fixture
	def synthetic_fetch_provenance() -> FetchProvenance:
	return FetchProvenance(
	source="vegapull",
	source_url="https://en.onepiece-cardgame.com/cardlist/",
	source_attribution="vegapull scraping en.onepiece-cardgame.com",
	source_fetched_at="2026-05-13T00:00:00+00:00",
	language="en",
	n_cards=N_CARDS,
	pack_ids_included=["OP01"],
	latest_pack_id="OP01",
	vegapull_version="1.2.2",
	)


	@pytest.fixture
	def synthetic_repo(
	tmp_path: Path,
	synthetic_cards: list[dict[str, Any]],
	synthetic_fetch_provenance: FetchProvenance,
	synthetic_embed_provenance: EmbedProvenance,
	) -> dict[str, Path]:
	"""Materialize a tmp directory laid out like the published HF repo:

	tmp/
	cards_with_embeddings.parquet
	provenance.json
	"""
	parquet_path = tmp_path / "cards_with_embeddings.parquet"
	pd.DataFrame(synthetic_cards).to_parquet(parquet_path, index=False)

	prov_path = tmp_path / "provenance.json"
	write_provenance(
	prov_path,
	fetch=synthetic_fetch_provenance,
	embed=synthetic_embed_provenance,
	)

	return {"parquet": parquet_path, "provenance": prov_path, "root": tmp_path}


	@pytest.fixture
	def patched_hf_download(
	monkeypatch: pytest.MonkeyPatch,
	synthetic_repo: dict[str, Path],
	):
	"""Patch huggingface_hub.hf_hub_download so spaceutil.data.load_corpus
	pulls from the local synthetic_repo instead of the network."""

	def fake_download(
	repo_id: str,
	filename: str,
	repo_type: str \| None = None,
	token: str \| None = None,
	**kwargs: Any,
	) -> str:
	# Token must never be logged; assertion-free here, the log-capture
	# test in test_data.py verifies the no-log invariant.
	if filename == "cards_with_embeddings.parquet":
	return str(synthetic_repo["parquet"])
	if filename == "provenance.json":
	return str(synthetic_repo["provenance"])
	raise FileNotFoundError(f"Unexpected filename in synthetic repo: {filename}")

	import huggingface_hub

	monkeypatch.setattr(huggingface_hub, "hf_hub_download", fake_download)
	# Also patch the symbol re-exported into spaceutil.data once it exists.
	try:
	import spaceutil.data as data_mod

	monkeypatch.setattr(data_mod, "hf_hub_download", fake_download, raising=False)
	except ImportError:
	pass

	return fake_download