Spaces:

hyper3labs
/

jaguar-hyperview-multigeometry

Running

App Files Files Community

jaguar-hyperview-multigeometry / demo.py

andandandand

Deploy cold-start reliability update (source: 85cf4fa)

d110c29 verified 7 days ago

raw

history blame contribute delete

22.9 kB

	#!/usr/bin/env python3
	"""HyperView Space runtime for core-claims top jaguar ReID models."""

	from __future__ import annotations

	import json
	import os
	import re
	import threading
	import time
	import traceback
	from datetime import datetime, timezone
	from pathlib import Path
	from typing import Any

	import numpy as np
	from datasets import Dataset as HFDataset
	from datasets import DatasetDict as HFDatasetDict
	from datasets import load_dataset, load_from_disk
	import hyperview as hv
	from hyperview.api import Session
	from hyperview.core.sample import Sample

	SPACE_HOST = os.environ.get("SPACE_HOST", "0.0.0.0")
	LOCAL_BIND_HOSTS = {"0.0.0.0", "127.0.0.1", "localhost", "::", "::1"}

	DATASET_NAME = os.environ.get("HYPERVIEW_DATASET_NAME", "jaguar_core_claims_demo")
	HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "hyper3labs/jaguar-hyperview-demo")
	HF_DATASET_CONFIG = os.environ.get("HF_DATASET_CONFIG", "default")
	HF_DATASET_SPLIT = os.environ.get("HF_DATASET_SPLIT", "train")

	EMBEDDING_ASSET_DIR = Path(
	os.environ.get(
	"EMBEDDING_ASSET_DIR",
	str((Path(__file__).resolve().parent / "assets").resolve()),
	)
	)
	ASSET_MANIFEST_PATH = Path(
	os.environ.get("EMBEDDING_ASSET_MANIFEST", str((EMBEDDING_ASSET_DIR / "manifest.json").resolve()))
	)

	DEFAULT_STARTUP_MODE = "serve_fast"
	DEFAULT_FAILURE_POLICY = "exit"
	DEFAULT_BATCH_INSERT_SIZE = 500
	DEFAULT_WARMUP_STATUS_PATH = Path("/tmp/hyperview_warmup_status.json")


	def _utc_now() -> str:
	return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


	def _resolve_startup_mode() -> str:
	startup_mode = os.environ.get("HYPERVIEW_STARTUP_MODE", DEFAULT_STARTUP_MODE).strip().lower()
	if startup_mode in {"serve_fast", "blocking"}:
	return startup_mode
	print(
	f"Invalid HYPERVIEW_STARTUP_MODE={startup_mode!r}; "
	f"falling back to {DEFAULT_STARTUP_MODE!r}."
	)
	return DEFAULT_STARTUP_MODE


	def _resolve_failure_policy() -> str:
	failure_policy = os.environ.get("HYPERVIEW_WARMUP_FAILURE_POLICY", DEFAULT_FAILURE_POLICY).strip().lower()
	if failure_policy in {"exit", "warn"}:
	return failure_policy
	print(
	f"Invalid HYPERVIEW_WARMUP_FAILURE_POLICY={failure_policy!r}; "
	f"falling back to {DEFAULT_FAILURE_POLICY!r}."
	)
	return DEFAULT_FAILURE_POLICY


	def _resolve_batch_insert_size() -> int:
	raw_value = os.environ.get("HYPERVIEW_BATCH_INSERT_SIZE", str(DEFAULT_BATCH_INSERT_SIZE)).strip()
	try:
	batch_size = int(raw_value)
	except ValueError as exc:
	raise ValueError(f"Invalid integer value for HYPERVIEW_BATCH_INSERT_SIZE: {raw_value}") from exc
	if batch_size <= 0:
	raise ValueError(f"HYPERVIEW_BATCH_INSERT_SIZE must be > 0; got {batch_size}")
	return batch_size


	def _resolve_warmup_status_path() -> Path:
	raw = os.environ.get("HYPERVIEW_WARMUP_STATUS_PATH")
	if raw is None:
	return DEFAULT_WARMUP_STATUS_PATH
	return Path(raw)


	class WarmupStatusTracker:
	"""Tracks warmup state and persists it to a JSON status file."""

	def __init__(self, status_path: Path):
	self._status_path = status_path
	self._lock = threading.Lock()
	now = _utc_now()
	self._state: dict[str, Any] = {
	"status": "starting",
	"phase": "boot",
	"counts": {},
	"error": None,
	"timestamps": {
	"started_at": now,
	"updated_at": now,
	},
	}
	self._persist_locked()

	def update(
	self,
	*,
	status: str \| None = None,
	phase: str \| None = None,
	counts: dict[str, Any] \| None = None,
	error: dict[str, Any] \| None = None,
	) -> None:
	with self._lock:
	now = _utc_now()
	if status is not None:
	self._state["status"] = status
	if phase is not None:
	self._state["phase"] = phase
	if counts:
	self._state["counts"].update(counts)
	if error is not None:
	self._state["error"] = error
	self._state["timestamps"]["updated_at"] = now
	if status == "ready":
	self._state["timestamps"]["ready_at"] = now
	if status == "failed":
	self._state["timestamps"]["failed_at"] = now
	self._persist_locked()

	@property
	def path(self) -> Path:
	return self._status_path

	def _persist_locked(self) -> None:
	try:
	self._status_path.parent.mkdir(parents=True, exist_ok=True)
	tmp_path = self._status_path.with_name(f"{self._status_path.name}.tmp")
	tmp_path.write_text(json.dumps(self._state, indent=2, sort_keys=True), encoding="utf-8")
	tmp_path.replace(self._status_path)
	except OSError as exc:
	print(f"Warmup status warning: failed writing status JSON to {self._status_path} ({exc})")


	def _patch_hyperview_default_panel() -> None:
	"""Patch HyperView 0.3.1 frontend for default panel and dock cache-key migration.

	HyperView currently has no public API for these behaviors. This runtime patch is
	intentionally narrow and idempotent, targeting the known bundled chunk for v0.3.1.
	"""
	default_panel = os.environ.get("HYPERVIEW_DEFAULT_PANEL", "spherical3d").strip().lower()
	apply_default_panel_patch = default_panel in {"spherical3d", "sphere3d"}
	if not apply_default_panel_patch:
	print(f"Skipping frontend default-panel patch (HYPERVIEW_DEFAULT_PANEL={default_panel!r}).")

	cache_version = os.environ.get("HYPERVIEW_LAYOUT_CACHE_VERSION", "v6").strip() or "v6"
	target_layout_key = f"hyperview:dockview-layout:{cache_version}"
	legacy_layout_key = "hyperview:dockview-layout:v5"
	layout_key_pattern = r"hyperview:dockview-layout:v\d+"

	chunk_path = (
	Path(hv.__file__).resolve().parent
	/ "server"
	/ "static"
	/ "_next"
	/ "static"
	/ "chunks"
	/ "077b38561d6ea80d.js"
	)
	if not chunk_path.exists():
	print(f"Default-panel patch skipped: chunk not found at {chunk_path}")
	return

	marker_before = 'v\|\|(v=n)};if(f&&l&&w({id:dr,title:"Euclidean"'
	marker_after = 'v\|\|(v=n),t.id===dd&&n.api.setActive()};if(f&&l&&w({id:dr,title:"Euclidean"'

	try:
	payload = chunk_path.read_text(encoding="utf-8")
	except OSError as exc:
	print(f"Default-panel patch skipped: failed reading chunk ({exc})")
	return

	patched = payload
	changed = False

	if apply_default_panel_patch:
	if marker_after in patched:
	print("HyperView frontend already patched for Sphere 3D default panel.")
	elif marker_before in patched:
	patched = patched.replace(marker_before, marker_after, 1)
	changed = True
	print("Patched HyperView frontend: Sphere 3D will open as default scatter panel.")
	else:
	print("Default-panel patch skipped: expected marker not found in HyperView chunk.")

	if target_layout_key in patched:
	print(f"HyperView frontend already uses dock cache key '{target_layout_key}'.")
	elif legacy_layout_key in patched:
	patched = patched.replace(legacy_layout_key, target_layout_key, 1)
	changed = True
	print(f"Patched HyperView frontend: dock cache key {legacy_layout_key} -> {target_layout_key}.")
	else:
	discovered = re.search(layout_key_pattern, patched)
	if discovered:
	source_key = discovered.group(0)
	if source_key == target_layout_key:
	print(f"HyperView frontend already uses dock cache key '{target_layout_key}'.")
	else:
	print(
	f"Dock cache patch notice: expected legacy key '{legacy_layout_key}' not found; "
	f"migrating detected key '{source_key}' -> '{target_layout_key}'."
	)
	patched = patched.replace(source_key, target_layout_key, 1)
	changed = True
	else:
	print(
	"Dock cache patch warning: expected layout cache key marker "
	f"'{legacy_layout_key}' not found in HyperView chunk."
	)

	if not changed:
	return

	try:
	chunk_path.write_text(patched, encoding="utf-8")
	except OSError as exc:
	print(f"Frontend patch skipped: failed writing chunk ({exc})")


	def _resolve_bind_host() -> tuple[str, str \| None]:
	explicit_bind = os.environ.get("HYPERVIEW_BIND_HOST")
	if explicit_bind:
	return explicit_bind, None

	if SPACE_HOST in LOCAL_BIND_HOSTS:
	return SPACE_HOST, None

	return "0.0.0.0", f"SPACE_HOST='{SPACE_HOST}' is non-local; falling back to 0.0.0.0"


	def _resolve_port() -> int:
	for key in ("SPACE_PORT", "PORT"):
	value = os.environ.get(key)
	if value:
	try:
	return int(value)
	except ValueError as exc:
	raise ValueError(f"Invalid integer value for {key}: {value}") from exc
	return 7860


	def load_asset_manifest(path: Path) -> dict[str, Any]:
	if not path.exists():
	raise FileNotFoundError(
	f"Embedding asset manifest not found: {path}. "
	"Run scripts/build_hyperview_demo_assets.py first."
	)
	payload = json.loads(path.read_text(encoding="utf-8"))
	if "models" not in payload or not isinstance(payload["models"], list):
	raise ValueError(f"Invalid asset manifest format: {path}")
	return payload


	def _load_hf_rows() -> HFDataset:
	repo_path = Path(HF_DATASET_REPO)
	if repo_path.exists():
	loaded = load_from_disk(str(repo_path))
	if isinstance(loaded, HFDatasetDict):
	if HF_DATASET_SPLIT in loaded:
	return loaded[HF_DATASET_SPLIT]
	if "train" in loaded:
	return loaded["train"]
	first_split = next(iter(loaded.keys()))
	return loaded[first_split]
	return loaded
	return load_dataset(HF_DATASET_REPO, name=HF_DATASET_CONFIG, split=HF_DATASET_SPLIT)


	def ingest_hf_dataset_samples(dataset: hv.Dataset, batch_insert_size: int \| None = None) -> dict[str, int]:
	rows = _load_hf_rows()
	media_root = Path(os.environ.get("HYPERVIEW_MEDIA_DIR", "./demo_data/media")) / DATASET_NAME
	media_root.mkdir(parents=True, exist_ok=True)

	effective_batch_size = _resolve_batch_insert_size() if batch_insert_size is None else int(batch_insert_size)
	if effective_batch_size <= 0:
	raise ValueError(f"batch_insert_size must be > 0; got {effective_batch_size}")

	records_by_id: dict[str, dict[str, Any]] = {}
	duplicate_ids = 0
	for index, row in enumerate(rows):
	filename = str(row.get("filename", f"sample_{index:06d}.jpg"))
	sample_id = str(row.get("sample_id", filename))
	if sample_id in records_by_id:
	duplicate_ids += 1
	continue
	records_by_id[sample_id] = {
	"index": index,
	"filename": filename,
	"sample_id": sample_id,
	"label": str(row.get("label", "")),
	"split_tag": str(row.get("split_tag", "unknown")),
	}

	candidate_records = list(records_by_id.values())
	candidate_ids = [record["sample_id"] for record in candidate_records]
	existing_ids = dataset._storage.get_existing_ids(candidate_ids) if candidate_ids else set()
	missing_records = [record for record in candidate_records if record["sample_id"] not in existing_ids]

	print(
	"HF ingestion plan: "
	f"candidates={len(candidate_records)} existing={len(existing_ids)} "
	f"missing={len(missing_records)} duplicates={duplicate_ids} "
	f"batch_insert_size={effective_batch_size}"
	)

	added = 0
	saved_images = 0
	pending_samples: list[Sample] = []

	def flush_pending_samples() -> None:
	nonlocal added
	if not pending_samples:
	return
	dataset._storage.add_samples_batch(pending_samples)
	added += len(pending_samples)
	print(f"Inserted sample batch: size={len(pending_samples)} total_inserted={added}")
	pending_samples.clear()

	for record in missing_records:
	sample_id = str(record["sample_id"])
	filename = str(record["filename"])

	row = rows[int(record["index"])]
	image_obj = row["image"]
	image_path = media_root / f"{Path(sample_id).stem}.jpg"
	if not image_path.exists():
	image_obj.convert("RGB").save(image_path, format="JPEG", quality=90, optimize=True)
	saved_images += 1

	metadata = {
	"filename": filename,
	"sample_id": sample_id,
	"split_tag": str(record["split_tag"]),
	"identity": str(record["label"]),
	"source_repo": HF_DATASET_REPO,
	"source_config": HF_DATASET_CONFIG,
	"source_split": HF_DATASET_SPLIT,
	}

	pending_samples.append(
	Sample(
	id=sample_id,
	filepath=str(image_path),
	label=str(record["label"]),
	metadata=metadata,
	)
	)

	if len(pending_samples) >= effective_batch_size:
	flush_pending_samples()

	flush_pending_samples()

	print(
	f"Ingested {added} HF samples into HyperView dataset '{DATASET_NAME}' "
	f"(saved_images={saved_images}, existing={len(existing_ids)})."
	)

	return {
	"candidates": len(candidate_records),
	"existing": len(existing_ids),
	"added": added,
	"saved_images": saved_images,
	"duplicates": duplicate_ids,
	}


	def ensure_embedding_spaces(dataset: hv.Dataset, asset_manifest: dict[str, Any], asset_dir: Path) -> None:
	known_sample_ids = {sample.id for sample in dataset.samples}

	for model in asset_manifest["models"]:
	model_key = str(model["model_key"])
	space_key = str(model["space_key"])
	embeddings_rel = model.get("embeddings_path")
	if not embeddings_rel:
	raise ValueError(f"Missing embeddings_path in asset manifest for model {model_key}")

	embeddings_path = asset_dir / str(embeddings_rel)
	if not embeddings_path.exists():
	raise FileNotFoundError(
	f"Missing embeddings file for model {model_key}: {embeddings_path}"
	)

	payload = np.load(embeddings_path, allow_pickle=False)
	ids = [str(x) for x in payload["ids"].tolist()]
	vectors = np.asarray(payload["vectors"], dtype=np.float32)

	if vectors.ndim != 2:
	raise ValueError(f"Embeddings for {model_key} must be 2D; got {vectors.shape}")
	if len(ids) != vectors.shape[0]:
	raise ValueError(
	f"Embeddings/ID mismatch for {model_key}: {len(ids)} ids vs {vectors.shape[0]} vectors"
	)

	missing_ids = sorted(set(ids) - known_sample_ids)
	if missing_ids:
	preview = ", ".join(missing_ids[:5])
	raise RuntimeError(
	f"Embedding IDs missing from loaded dataset for {model_key}. "
	f"First missing IDs: {preview}"
	)

	config = {
	"provider": "precomputed-checkpoint",
	"geometry": str(model["geometry"]),
	"comparison_key": model.get("comparison_key"),
	"family": model.get("family"),
	"checkpoint_path": model.get("checkpoint_path"),
	}

	dataset._storage.ensure_space(
	model_id=model_key,
	dim=int(vectors.shape[1]),
	space_key=space_key,
	config=config,
	)
	dataset._storage.add_embeddings(space_key, ids, vectors)

	print(f"Ensured space {space_key} ({vectors.shape[0]} x {vectors.shape[1]})")


	def ensure_layouts(dataset: hv.Dataset, asset_manifest: dict[str, Any]) -> list[str]:
	layout_keys: list[str] = []
	for model in asset_manifest["models"]:
	space_key = str(model["space_key"])
	layout_spec = str(model.get("layout", "euclidean:2d"))
	layout_key = dataset.compute_visualization(
	space_key=space_key,
	layout=layout_spec,
	method="umap",
	force=False,
	)
	layout_keys.append(layout_key)
	print(f"Ensured layout {layout_key} for space={space_key}")
	return layout_keys


	def _run_warmup(dataset: hv.Dataset, tracker: WarmupStatusTracker) -> None:
	asset_manifest = load_asset_manifest(ASSET_MANIFEST_PATH)
	tracker.update(
	status="running",
	phase="ingest",
	counts={"manifest_models": len(asset_manifest.get("models", []))},
	)

	batch_insert_size = _resolve_batch_insert_size()
	if len(dataset) == 0:
	print(
	f"Loading HF dataset rows from {HF_DATASET_REPO}[{HF_DATASET_CONFIG}] split={HF_DATASET_SPLIT}"
	)
	ingest_stats = ingest_hf_dataset_samples(dataset, batch_insert_size=batch_insert_size)
	else:
	ingest_stats = {
	"candidates": len(dataset),
	"existing": len(dataset),
	"added": 0,
	"saved_images": 0,
	"duplicates": 0,
	}
	print(f"Skipping HF ingestion because dataset '{DATASET_NAME}' already has {len(dataset)} samples.")

	tracker.update(
	counts={
	"batch_insert_size": batch_insert_size,
	"dataset_samples": len(dataset),
	**ingest_stats,
	}
	)

	tracker.update(phase="spaces")
	ensure_embedding_spaces(dataset, asset_manifest=asset_manifest, asset_dir=EMBEDDING_ASSET_DIR)
	tracker.update(counts={"spaces": len(dataset.list_spaces())})

	tracker.update(phase="layouts")
	layout_keys = ensure_layouts(dataset, asset_manifest=asset_manifest)

	tracker.update(
	status="ready",
	phase="ready",
	counts={
	"dataset_samples": len(dataset),
	"spaces": len(dataset.list_spaces()),
	"layouts": len(layout_keys),
	},
	)

	print(f"Dataset '{DATASET_NAME}' has {len(dataset)} samples")
	print(f"Spaces: {[space.space_key for space in dataset.list_spaces()]}")
	print(f"Layouts: {layout_keys}")


	def _run_warmup_blocking(dataset: hv.Dataset, tracker: WarmupStatusTracker) -> None:
	try:
	_run_warmup(dataset, tracker)
	except Exception as exc:
	tb = traceback.format_exc()
	tracker.update(
	status="failed",
	phase="failed",
	error={
	"type": type(exc).__name__,
	"message": str(exc),
	"traceback": tb,
	},
	)
	print(tb)
	raise


	def _warmup_worker(
	dataset: hv.Dataset,
	tracker: WarmupStatusTracker,
	failure_policy: str,
	failure_event: threading.Event,
	failure_holder: dict[str, str],
	) -> None:
	try:
	_run_warmup(dataset, tracker)
	except Exception as exc:
	tb = traceback.format_exc()
	tracker.update(
	status="failed",
	phase="failed",
	error={
	"type": type(exc).__name__,
	"message": str(exc),
	"traceback": tb,
	},
	)
	print("Warmup failed:")
	print(tb)
	failure_holder["error"] = f"{type(exc).__name__}: {exc}"
	if failure_policy == "exit":
	failure_event.set()


	def _start_server_session(dataset: hv.Dataset, bind_host: str, bind_port: int) -> Session:
	session = Session(dataset, host=bind_host, port=bind_port)
	session.start(background=True)
	print(f"HyperView server is running at {session.url}")
	return session


	def _serve_forever(
	session: Session,
	*,
	failure_event: threading.Event \| None = None,
	failure_holder: dict[str, str] \| None = None,
	) -> None:
	try:
	while True:
	time.sleep(0.25)
	if session._server_thread is not None and not session._server_thread.is_alive():
	raise RuntimeError("HyperView server stopped unexpectedly.")

	if failure_event is not None and failure_event.is_set():
	reason = None
	if failure_holder is not None:
	reason = failure_holder.get("error")
	if reason:
	raise RuntimeError(f"Warmup failed and failure policy is 'exit': {reason}")
	raise RuntimeError("Warmup failed and failure policy is 'exit'.")
	except KeyboardInterrupt:
	pass
	finally:
	session.stop()
	if session._server_thread is not None:
	session._server_thread.join(timeout=2.0)


	def main() -> None:
	_patch_hyperview_default_panel()

	startup_mode = _resolve_startup_mode()
	failure_policy = _resolve_failure_policy()
	warmup_status_path = _resolve_warmup_status_path()

	dataset = hv.Dataset(DATASET_NAME)
	tracker = WarmupStatusTracker(warmup_status_path)
	tracker.update(
	counts={
	"dataset_samples": len(dataset),
	"startup_mode": startup_mode,
	"failure_policy": failure_policy,
	"batch_insert_size": _resolve_batch_insert_size(),
	}
	)

	bind_host, bind_warning = _resolve_bind_host()
	bind_port = _resolve_port()

	if bind_warning:
	print(f"Bind host notice: {bind_warning}")

	print(
	"Starting HyperView runtime with "
	f"startup_mode={startup_mode} failure_policy={failure_policy} "
	f"status_path={warmup_status_path} bind_host={bind_host} bind_port={bind_port} "
	f"(SPACE_HOST={SPACE_HOST!r}, SPACE_PORT={os.environ.get('SPACE_PORT')!r}, "
	f"PORT={os.environ.get('PORT')!r})"
	)

	if os.environ.get("HYPERVIEW_DEMO_PREP_ONLY") == "1":
	_run_warmup_blocking(dataset, tracker)
	print("Preparation-only mode enabled; skipping server launch.")
	return

	if startup_mode == "blocking":
	_run_warmup_blocking(dataset, tracker)
	session = _start_server_session(dataset, bind_host=bind_host, bind_port=bind_port)
	_serve_forever(session)
	return

	failure_event = threading.Event()
	failure_holder: dict[str, str] = {}

	warmup_thread = threading.Thread(
	target=_warmup_worker,
	name="hyperview-warmup",
	args=(dataset, tracker, failure_policy, failure_event, failure_holder),
	daemon=True,
	)
	warmup_thread.start()
	print("Warmup thread started in background.")

	session = _start_server_session(dataset, bind_host=bind_host, bind_port=bind_port)

	if failure_policy == "exit":
	_serve_forever(session, failure_event=failure_event, failure_holder=failure_holder)
	else:
	_serve_forever(session)


	if __name__ == "__main__":
	main()