Spaces:

specimba
/

nexus-os-space

Running

App Files Files Community

specimba commited on 6 days ago

Commit

69e11c4

verified ·

1 Parent(s): 1d23314

Copy nexus_os_v2/ernie_adapter.py from dataset for module imports

Browse files

Files changed (1) hide show

nexus_os_v2/ernie_adapter.py +182 -0

nexus_os_v2/ernie_adapter.py ADDED Viewed

	@@ -0,0 +1,182 @@

+"""
+ERNIE 5.1 Browser-Sourced Manual Callback Adapter
+ERNIE has no API — this adapter bridges browser output to NEXUS OS.
+Usage pattern:
+  1. User manually copies ERNIE browser output to clipboard/file
+  2. ERNIEAdapter reads the file and returns structured evidence
+  3. Adapter normalizes scores to [0,1] for μ_ret compatibility
+Fallback: If no manual input available, adapter degrades gracefully
+with empty evidence and warns the router to use parametric-only mode.
+"""
+import json
+import os
+import re
+from typing import List, Dict, Optional, Any
+from dataclasses import dataclass
+from pathlib import Path
+@dataclass
+class ERNIEEvidence:
+    text: str
+    confidence: float           # Normalized 0-1
+    source: str                  # "ernie_browser"
+    timestamp: Optional[str] = None
+    raw_score: Optional[float] = None  # Original ERNIE score if parsed
+class ERNIEAdapter:
+    """
+    Manual callback adapter for Baidu ERNIE 5.1 (no public API).
+    ERNIE 5.1 is browser-only — all interaction happens through:
+      https://yiyan.baidu.com (Chinese interface)
+      https://ernie.baidu.com (International)
+    This adapter reads manually-captured ERNIE outputs from:
+      - A watched file (ERNIE_OUTPUT_PATH env var)
+      - A JSON clipboard dump
+      - A structured text export
+    """
+    DEFAULT_WATCH_PATH = "./ernie_output.json"
+    SCORE_PATTERN = re.compile(r'(置信度|confidence|可信度)[：:]\s*(\d+\.?\d*)', re.I)
+    def __init__(self, watch_path: Optional[str] = None):
+        self.watch_path = Path(watch_path or os.environ.get("ERNIE_OUTPUT_PATH", self.DEFAULT_WATCH_PATH))
+        self._last_read_mtime: Optional[float] = None
+        self._cache: List[ERNIEEvidence] = []
+    def _parse_raw_text(self, raw: str) -> List[ERNIEEvidence]:
+        """Parse unstructured ERNIE browser output into evidence chunks."""
+        # Split by numbered items or paragraph breaks
+        chunks = re.split(r'\n\n+|\d+\.\s+', raw)
+        evidence = []
+        for chunk in chunks:
+            chunk = chunk.strip()
+            if len(chunk) < 10:
+                continue
+            # Try to extract confidence score
+            match = self.SCORE_PATTERN.search(chunk)
+            raw_score = float(match.group(2)) if match else None
+            confidence = raw_score / 100.0 if raw_score and raw_score > 1.0 else (raw_score or 0.7)
+            evidence.append(ERNIEEvidence(
+                text=chunk,
+                confidence=min(max(confidence, 0.0), 1.0),
+                source="ernie_browser",
+                raw_score=raw_score,
+            ))
+        return evidence
+    def _read_file(self) -> Optional[str]:
+        """Read watch file if it exists and has been modified."""
+        if not self.watch_path.exists():
+            return None
+        mtime = self.watch_path.stat().st_mtime
+        if self._last_read_mtime and mtime <= self._last_read_mtime:
+            return None  # Not modified
+        self._last_read_mtime = mtime
+        return self.watch_path.read_text(encoding="utf-8")
+    def poll(self) -> List[ERNIEEvidence]:
+        """Poll for new ERNIE browser output. Returns [] if none available."""
+        raw = self._read_file()
+        if raw is None:
+            return self._cache  # Return cached if no new data
+        # Try JSON first
+        try:
+            data = json.loads(raw)
+            if isinstance(data, list):
+                self._cache = [
+                    ERNIEEvidence(
+                        text=item.get("text", item.get("answer", str(item))),
+                        confidence=item.get("confidence", 0.7),
+                        source="ernie_browser",
+                        timestamp=item.get("timestamp"),
+                    )
+                    for item in data
+                ]
+            elif isinstance(data, dict):
+                self._cache = [ERNIEEvidence(
+                    text=data.get("text", data.get("answer", str(data))),
+                    confidence=data.get("confidence", 0.7),
+                    source="ernie_browser",
+                    timestamp=data.get("timestamp"),
+                )]
+        except json.JSONDecodeError:
+            # Parse as raw text
+            self._cache = self._parse_raw_text(raw)
+        return self._cache
+    def get_evidence(self, query: str) -> List[Dict[str, Any]]:
+        """Format ERNIE evidence for CK-PLUG / TWAVE consumption."""
+        evidence = self.poll()
+        return [
+            {
+                "text": e.text,
+                "relevance": e.confidence,  # Maps to μ_ret scale
+                "source": e.source,
+                "timestamp": e.timestamp,
+            }
+            for e in evidence
+        ]
+    def is_available(self) -> bool:
+        """Check if ERNIE evidence is currently available."""
+        return len(self.poll()) > 0
+    def get_status(self) -> Dict[str, Any]:
+        """Return adapter status for monitoring."""
+        evidence = self.poll()
+        return {
+            "available": len(evidence) > 0,
+            "watch_path": str(self.watch_path),
+            "evidence_count": len(evidence),
+            "avg_confidence": sum(e.confidence for e in evidence) / len(evidence) if evidence else 0.0,
+            "sources": list(set(e.source for e in evidence)),
+        }
+class MockERNIEAdapter:
+    """Mock adapter that returns synthetic ERNIE evidence for testing."""
+    def __init__(self):
+        self._mock_evidence = [
+            ERNIEEvidence(
+                text="ERNIE 5.1 confirms: The thermodynamic BEC analogy for LLM reasoning is structurally valid when applied to internal effective temperature, not sampling temperature.",
+                confidence=0.91,
+                source="ernie_browser",
+            ),
+            ERNIEEvidence(
+                text="ERNIE 5.1 analysis: Claude Opus 4.7 and GPT-5.5 use internal thermostat regulation decoupled from user-facing temperature controls.",
+                confidence=0.85,
+                source="ernie_browser",
+            ),
+            ERNIEEvidence(
+                text="ERNIE 5.1 observation: Jarzynski equality has not been applied to autoregressive LLM generation in published literature as of May 2026.",
+                confidence=0.78,
+                source="ernie_browser",
+            ),
+        ]
+    def poll(self) -> List[ERNIEEvidence]:
+        return self._mock_evidence
+    def get_evidence(self, query: str) -> List[Dict[str, Any]]:
+        return [{"text": e.text, "relevance": e.confidence, "source": e.source} for e in self._mock_evidence]
+    def is_available(self) -> bool:
+        return True
+    def get_status(self) -> Dict[str, Any]:
+        return {
+            "available": True,
+            "watch_path": "mock://ernie",
+            "evidence_count": len(self._mock_evidence),
+            "avg_confidence": sum(e.confidence for e in self._mock_evidence) / len(self._mock_evidence),
+            "sources": ["ernie_browser"],
+        }