Spaces:
Running
Running
| """per_token_debug Telemetry Schema for NEXUS OS v2.""" | |
| from typing import List, Tuple, Optional, Dict, Any | |
| from dataclasses import dataclass, field | |
| import json | |
| class PerTokenDebug: | |
| """Complete telemetry for a single token position.""" | |
| position: int; token_id: int; token_str: str | |
| logits: Optional[List[float]] = None | |
| logit_top_k: Optional[List[Tuple[int, float]]] = None | |
| probs: Optional[List[float]] = None | |
| prob_top_k: Optional[List[Tuple[int, float]]] = None | |
| prob_top_1: Optional[float] = None | |
| entropy: Optional[float] = None | |
| entropy_normalized: Optional[float] = None | |
| hidden_state: Optional[List[float]] = None | |
| hidden_state_l2: Optional[float] = None | |
| attention_entropy: Optional[float] = None | |
| attention_mass_to_image: Optional[float] = None | |
| attention_mass_to_retrieval: Optional[float] = None | |
| ckplug_cg: Optional[float] = None | |
| ckplug_H_para: Optional[float] = None | |
| ckplug_H_cont: Optional[float] = None | |
| ckplug_alpha: Optional[float] = None | |
| ckplug_modulated: Optional[bool] = None | |
| retrieval_context_ids: Optional[List[int]] = None | |
| retrieval_relevance_scores: Optional[List[float]] = None | |
| retrieval_source: Optional[str] = None | |
| twave_T_eff: Optional[float] = None | |
| twave_coherence: Optional[float] = None | |
| twave_psi: Optional[float] = None | |
| twave_f_density: Optional[float] = None | |
| twave_mu_ret: Optional[float] = None | |
| twave_E_exc: Optional[float] = None | |
| twave_k_local: Optional[float] = None | |
| twave_xi: Optional[float] = None | |
| twave_C_V: Optional[float] = None | |
| jarzynski_W_i: Optional[float] = None | |
| jarzynski_W_cumulative: Optional[float] = None | |
| jarzynski_beta_eff: Optional[float] = None | |
| jarzynski_bound_violated: Optional[bool] = None | |
| flag_stable: Optional[bool] = None | |
| flag_reflection_triggered: Optional[bool] = None | |
| flag_hallucination_risk: Optional[str] = None | |
| flag_action: Optional[str] = None | |
| model_id: Optional[str] = None | |
| tier: Optional[str] = None | |
| temperature_setting: Optional[float] = None | |
| top_p_setting: Optional[float] = None | |
| generation_time_ms: Optional[float] = None | |
| timestamp_utc: Optional[str] = None | |
| def to_dict(self) -> Dict[str, Any]: | |
| return {k: v for k, v in self.__dict__.items() if v is not None} | |
| def to_json(self, indent: int = 2) -> str: | |
| return json.dumps(self.to_dict(), indent=indent, default=str) | |
| def from_dict(cls, d: Dict[str, Any]) -> "PerTokenDebug": | |
| known = {f.name for f in cls.__dataclass_fields__.values()} | |
| return cls(**{k: v for k, v in d.items() if k in known}) | |
| class GenerationTelemetry: | |
| """Complete telemetry for an entire generation sequence.""" | |
| request_id: str; prompt: str; enhanced_prompt: Optional[str] = None | |
| tokens: List[PerTokenDebug] = field(default_factory=list) | |
| total_tokens: int = 0; total_latency_ms: float = 0.0 | |
| avg_entropy: float = 0.0; max_entropy: float = 0.0; entropy_variance: float = 0.0 | |
| avg_T_eff: float = 0.0; max_T_eff: float = 0.0; T_eff_variance: float = 0.0 | |
| coherence_trajectory: List[float] = field(default_factory=list) | |
| psi_trajectory: List[float] = field(default_factory=list) | |
| reflection_count: int = 0 | |
| reflection_positions: List[int] = field(default_factory=list) | |
| high_risk_positions: List[int] = field(default_factory=list) | |
| hallucination_risk_score: float = 0.0 | |
| total_jarzynski_work: float = 0.0 | |
| jarzynski_bound_violated: bool = False | |
| retrieval_coverage: float = 0.0 | |
| avg_retrieval_relevance: float = 0.0 | |
| selected_model: Optional[str] = None | |
| model_family: Optional[str] = None | |
| model_params_b: Optional[float] = None | |
| model_quantization: Optional[str] = None | |
| routing_tags: List[str] = field(default_factory=list) | |
| complexity_score: Optional[float] = None | |
| budget_vram_gb: Optional[float] = None | |
| def to_dict(self) -> Dict[str, Any]: | |
| return {k: v for k, v in self.__dict__.items() if not isinstance(v, list) or all(not isinstance(x, PerTokenDebug) for x in v)} | |
| def to_json(self, indent: int = 2) -> str: | |
| d = self.to_dict() | |
| d["tokens"] = [t.to_dict() for t in self.tokens] | |
| return json.dumps(d, indent=indent, default=str) | |
| def compute_aggregates(self): | |
| if not self.tokens: return | |
| self.total_tokens = len(self.tokens) | |
| entropies = [t.entropy for t in self.tokens if t.entropy is not None] | |
| if entropies: | |
| self.avg_entropy = sum(entropies)/len(entropies) | |
| self.max_entropy = max(entropies) | |
| self.entropy_variance = sum((e-self.avg_entropy)**2 for e in entropies)/len(entropies) | |
| T_effs = [t.twave_T_eff for t in self.tokens if t.twave_T_eff is not None] | |
| if T_effs: | |
| self.avg_T_eff = sum(T_effs)/len(T_effs) | |
| self.max_T_eff = max(T_effs) | |
| self.T_eff_variance = sum((t-self.avg_T_eff)**2 for t in T_effs)/len(T_effs) | |
| self.coherence_trajectory = [t.twave_coherence for t in self.tokens if t.twave_coherence is not None] | |
| self.psi_trajectory = [t.twave_psi for t in self.tokens if t.twave_psi is not None] | |
| self.reflection_count = sum(1 for t in self.tokens if t.flag_reflection_triggered) | |
| self.reflection_positions = [t.position for t in self.tokens if t.flag_reflection_triggered] | |
| self.high_risk_positions = [t.position for t in self.tokens if t.flag_hallucination_risk in ("high","critical")] | |
| works = [t.jarzynski_W_i for t in self.tokens if t.jarzynski_W_i is not None] | |
| if works: self.total_jarzynski_work = sum(works) | |
| risks = [1.0 if t.flag_hallucination_risk=="critical" else 0.7 if t.flag_hallucination_risk=="high" else 0.3 if t.flag_hallucination_risk=="elevated" else 0.0 for t in self.tokens] | |
| self.hallucination_risk_score = sum(risks)/len(risks) if risks else 0.0 | |
| self.retrieval_coverage = sum(1 for t in self.tokens if t.retrieval_source is not None)/self.total_tokens | |
| rels = [s for t in self.tokens if t.retrieval_relevance_scores for s in t.retrieval_relevance_scores] | |
| if rels: self.avg_retrieval_relevance = sum(rels)/len(rels) | |