specimba commited on
Commit
69e11c4
·
verified ·
1 Parent(s): 1d23314

Copy nexus_os_v2/ernie_adapter.py from dataset for module imports

Browse files
Files changed (1) hide show
  1. nexus_os_v2/ernie_adapter.py +182 -0
nexus_os_v2/ernie_adapter.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ERNIE 5.1 Browser-Sourced Manual Callback Adapter
3
+ ERNIE has no API — this adapter bridges browser output to NEXUS OS.
4
+
5
+ Usage pattern:
6
+ 1. User manually copies ERNIE browser output to clipboard/file
7
+ 2. ERNIEAdapter reads the file and returns structured evidence
8
+ 3. Adapter normalizes scores to [0,1] for μ_ret compatibility
9
+
10
+ Fallback: If no manual input available, adapter degrades gracefully
11
+ with empty evidence and warns the router to use parametric-only mode.
12
+ """
13
+ import json
14
+ import os
15
+ import re
16
+ from typing import List, Dict, Optional, Any
17
+ from dataclasses import dataclass
18
+ from pathlib import Path
19
+
20
+
21
+ @dataclass
22
+ class ERNIEEvidence:
23
+ text: str
24
+ confidence: float # Normalized 0-1
25
+ source: str # "ernie_browser"
26
+ timestamp: Optional[str] = None
27
+ raw_score: Optional[float] = None # Original ERNIE score if parsed
28
+
29
+
30
+ class ERNIEAdapter:
31
+ """
32
+ Manual callback adapter for Baidu ERNIE 5.1 (no public API).
33
+
34
+ ERNIE 5.1 is browser-only — all interaction happens through:
35
+ https://yiyan.baidu.com (Chinese interface)
36
+ https://ernie.baidu.com (International)
37
+
38
+ This adapter reads manually-captured ERNIE outputs from:
39
+ - A watched file (ERNIE_OUTPUT_PATH env var)
40
+ - A JSON clipboard dump
41
+ - A structured text export
42
+ """
43
+
44
+ DEFAULT_WATCH_PATH = "./ernie_output.json"
45
+ SCORE_PATTERN = re.compile(r'(置信度|confidence|可信度)[::]\s*(\d+\.?\d*)', re.I)
46
+
47
+ def __init__(self, watch_path: Optional[str] = None):
48
+ self.watch_path = Path(watch_path or os.environ.get("ERNIE_OUTPUT_PATH", self.DEFAULT_WATCH_PATH))
49
+ self._last_read_mtime: Optional[float] = None
50
+ self._cache: List[ERNIEEvidence] = []
51
+
52
+ def _parse_raw_text(self, raw: str) -> List[ERNIEEvidence]:
53
+ """Parse unstructured ERNIE browser output into evidence chunks."""
54
+ # Split by numbered items or paragraph breaks
55
+ chunks = re.split(r'\n\n+|\d+\.\s+', raw)
56
+ evidence = []
57
+ for chunk in chunks:
58
+ chunk = chunk.strip()
59
+ if len(chunk) < 10:
60
+ continue
61
+ # Try to extract confidence score
62
+ match = self.SCORE_PATTERN.search(chunk)
63
+ raw_score = float(match.group(2)) if match else None
64
+ confidence = raw_score / 100.0 if raw_score and raw_score > 1.0 else (raw_score or 0.7)
65
+ evidence.append(ERNIEEvidence(
66
+ text=chunk,
67
+ confidence=min(max(confidence, 0.0), 1.0),
68
+ source="ernie_browser",
69
+ raw_score=raw_score,
70
+ ))
71
+ return evidence
72
+
73
+ def _read_file(self) -> Optional[str]:
74
+ """Read watch file if it exists and has been modified."""
75
+ if not self.watch_path.exists():
76
+ return None
77
+ mtime = self.watch_path.stat().st_mtime
78
+ if self._last_read_mtime and mtime <= self._last_read_mtime:
79
+ return None # Not modified
80
+ self._last_read_mtime = mtime
81
+ return self.watch_path.read_text(encoding="utf-8")
82
+
83
+ def poll(self) -> List[ERNIEEvidence]:
84
+ """Poll for new ERNIE browser output. Returns [] if none available."""
85
+ raw = self._read_file()
86
+ if raw is None:
87
+ return self._cache # Return cached if no new data
88
+
89
+ # Try JSON first
90
+ try:
91
+ data = json.loads(raw)
92
+ if isinstance(data, list):
93
+ self._cache = [
94
+ ERNIEEvidence(
95
+ text=item.get("text", item.get("answer", str(item))),
96
+ confidence=item.get("confidence", 0.7),
97
+ source="ernie_browser",
98
+ timestamp=item.get("timestamp"),
99
+ )
100
+ for item in data
101
+ ]
102
+ elif isinstance(data, dict):
103
+ self._cache = [ERNIEEvidence(
104
+ text=data.get("text", data.get("answer", str(data))),
105
+ confidence=data.get("confidence", 0.7),
106
+ source="ernie_browser",
107
+ timestamp=data.get("timestamp"),
108
+ )]
109
+ except json.JSONDecodeError:
110
+ # Parse as raw text
111
+ self._cache = self._parse_raw_text(raw)
112
+
113
+ return self._cache
114
+
115
+ def get_evidence(self, query: str) -> List[Dict[str, Any]]:
116
+ """Format ERNIE evidence for CK-PLUG / TWAVE consumption."""
117
+ evidence = self.poll()
118
+ return [
119
+ {
120
+ "text": e.text,
121
+ "relevance": e.confidence, # Maps to μ_ret scale
122
+ "source": e.source,
123
+ "timestamp": e.timestamp,
124
+ }
125
+ for e in evidence
126
+ ]
127
+
128
+ def is_available(self) -> bool:
129
+ """Check if ERNIE evidence is currently available."""
130
+ return len(self.poll()) > 0
131
+
132
+ def get_status(self) -> Dict[str, Any]:
133
+ """Return adapter status for monitoring."""
134
+ evidence = self.poll()
135
+ return {
136
+ "available": len(evidence) > 0,
137
+ "watch_path": str(self.watch_path),
138
+ "evidence_count": len(evidence),
139
+ "avg_confidence": sum(e.confidence for e in evidence) / len(evidence) if evidence else 0.0,
140
+ "sources": list(set(e.source for e in evidence)),
141
+ }
142
+
143
+
144
+ class MockERNIEAdapter:
145
+ """Mock adapter that returns synthetic ERNIE evidence for testing."""
146
+
147
+ def __init__(self):
148
+ self._mock_evidence = [
149
+ ERNIEEvidence(
150
+ text="ERNIE 5.1 confirms: The thermodynamic BEC analogy for LLM reasoning is structurally valid when applied to internal effective temperature, not sampling temperature.",
151
+ confidence=0.91,
152
+ source="ernie_browser",
153
+ ),
154
+ ERNIEEvidence(
155
+ text="ERNIE 5.1 analysis: Claude Opus 4.7 and GPT-5.5 use internal thermostat regulation decoupled from user-facing temperature controls.",
156
+ confidence=0.85,
157
+ source="ernie_browser",
158
+ ),
159
+ ERNIEEvidence(
160
+ text="ERNIE 5.1 observation: Jarzynski equality has not been applied to autoregressive LLM generation in published literature as of May 2026.",
161
+ confidence=0.78,
162
+ source="ernie_browser",
163
+ ),
164
+ ]
165
+
166
+ def poll(self) -> List[ERNIEEvidence]:
167
+ return self._mock_evidence
168
+
169
+ def get_evidence(self, query: str) -> List[Dict[str, Any]]:
170
+ return [{"text": e.text, "relevance": e.confidence, "source": e.source} for e in self._mock_evidence]
171
+
172
+ def is_available(self) -> bool:
173
+ return True
174
+
175
+ def get_status(self) -> Dict[str, Any]:
176
+ return {
177
+ "available": True,
178
+ "watch_path": "mock://ernie",
179
+ "evidence_count": len(self._mock_evidence),
180
+ "avg_confidence": sum(e.confidence for e in self._mock_evidence) / len(self._mock_evidence),
181
+ "sources": ["ernie_browser"],
182
+ }