specimba commited on
Commit
1d23314
·
verified ·
1 Parent(s): 5466f63

Copy nexus_os_v2/milvus_client.py from dataset for module imports

Browse files
Files changed (1) hide show
  1. nexus_os_v2/milvus_client.py +113 -0
nexus_os_v2/milvus_client.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Milvus Retriever Client for NEXUS OS v2
3
+ Uses Free-01 and Serverless-01 clusters (aws-eu-central-1).
4
+
5
+ API key: loaded from env MILVUS_API_KEY or MILVUS_URI
6
+ """
7
+ import os
8
+ from typing import List, Dict, Optional, Any
9
+ from dataclasses import dataclass
10
+
11
+ try:
12
+ from pymilvus import MilvusClient, DataType
13
+ MILVUS_AVAILABLE = True
14
+ except ImportError:
15
+ MILVUS_AVAILABLE = False
16
+
17
+
18
+ @dataclass
19
+ class MilvusResult:
20
+ text: str
21
+ score: float
22
+ metadata: Dict[str, Any]
23
+ collection: str
24
+
25
+
26
+ class MilvusRetriever:
27
+ """
28
+ Milvus retriever using Zilliz Cloud clusters.
29
+ Supports both Free-01 and Serverless-01 endpoints.
30
+ """
31
+
32
+ CLUSTERS = {
33
+ "free-01": "https://in03-9e3e3e3e3e3e3e3e.api.aws-eu-central-1.zillizcloud.com", # placeholder
34
+ "serverless-01": "https://in03-serverless.api.aws-eu-central-1.zillizcloud.com", # placeholder
35
+ }
36
+
37
+ def __init__(
38
+ self,
39
+ cluster: str = "serverless-01",
40
+ api_key: Optional[str] = None,
41
+ uri: Optional[str] = None,
42
+ top_k: int = 10,
43
+ ):
44
+ if not MILVUS_AVAILABLE:
45
+ raise ImportError("pymilvus not installed. Run: pip install pymilvus")
46
+ self.api_key = api_key or os.environ.get("MILVUS_API_KEY")
47
+ self.uri = uri or os.environ.get("MILVUS_URI") or self.CLUSTERS.get(cluster)
48
+ if not self.uri:
49
+ raise ValueError(f"MILVUS_URI or cluster name required. Known: {list(self.CLUSTERS.keys())}")
50
+ self.client = MilvusClient(uri=self.uri, token=self.api_key)
51
+ self.top_k = top_k
52
+
53
+ def search(self, collection: str, query: str, top_k: Optional[int] = None) -> List[MilvusResult]:
54
+ """Search a Milvus collection for relevant documents."""
55
+ k = top_k or self.top_k
56
+ results = self.client.search(
57
+ collection_name=collection,
58
+ data=[query],
59
+ limit=k,
60
+ output_fields=["text", "metadata"],
61
+ )
62
+ return [
63
+ MilvusResult(
64
+ text=r.get("entity", {}).get("text", ""),
65
+ score=r.get("distance", 0.0),
66
+ metadata=r.get("entity", {}).get("metadata", {}),
67
+ collection=collection,
68
+ )
69
+ for r in results[0] # Milvus returns list of lists
70
+ ]
71
+
72
+ def get_evidence(self, collection: str, query: str) -> List[Dict[str, Any]]:
73
+ """Format Milvus results for CK-PLUG coupling."""
74
+ results = self.search(collection, query)
75
+ return [
76
+ {"text": r.text, "relevance": r.score, "collection": r.collection}
77
+ for r in results
78
+ ]
79
+
80
+
81
+ class MockMilvusRetriever:
82
+ """Offline mock of MilvusRetriever."""
83
+
84
+ def __init__(self, top_k: int = 5):
85
+ self.top_k = top_k
86
+ self._collections = {
87
+ "nexus_docs": [
88
+ {"text": "NEXUS OS v2 uses BEC thermodynamic control for reasoning stability.", "score": 0.92},
89
+ {"text": "Jarzynski equality filters non-equilibrium hallucination trajectories.", "score": 0.85},
90
+ {"text": "ChimeraRouter selects between 35+ local models and 6 cloud APIs.", "score": 0.80},
91
+ ],
92
+ "research_papers": [
93
+ {"text": "CK-PLUG modulates token-level knowledge reliance via Confidence Gain.", "score": 0.94},
94
+ {"text": "Arnold et al. detect phase transitions in LLM output distributions.", "score": 0.87},
95
+ {"text": "Kim derives intelligence Lagrangian on Fisher-metric manifold.", "score": 0.83},
96
+ ],
97
+ }
98
+
99
+ def search(self, collection: str, query: str, top_k: Optional[int] = None) -> List[MilvusResult]:
100
+ k = top_k or self.top_k
101
+ docs = self._collections.get(collection, [])
102
+ # Simple keyword scoring
103
+ scored = []
104
+ for doc in docs:
105
+ overlap = len(set(query.lower().split()) & set(doc["text"].lower().split()))
106
+ score = doc["score"] * (0.5 + 0.5 * (overlap / max(1, len(query.split()))))
107
+ scored.append(MilvusResult(text=doc["text"], score=score, metadata={}, collection=collection))
108
+ scored.sort(key=lambda x: x.score, reverse=True)
109
+ return scored[:k]
110
+
111
+ def get_evidence(self, collection: str, query: str) -> List[Dict[str, Any]]:
112
+ results = self.search(collection, query)
113
+ return [{"text": r.text, "relevance": r.score, "collection": r.collection} for r in results]