Spaces:
Running
Running
Copy nexus_os_v2/milvus_client.py from dataset for module imports
Browse files- nexus_os_v2/milvus_client.py +113 -0
nexus_os_v2/milvus_client.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Milvus Retriever Client for NEXUS OS v2
|
| 3 |
+
Uses Free-01 and Serverless-01 clusters (aws-eu-central-1).
|
| 4 |
+
|
| 5 |
+
API key: loaded from env MILVUS_API_KEY or MILVUS_URI
|
| 6 |
+
"""
|
| 7 |
+
import os
|
| 8 |
+
from typing import List, Dict, Optional, Any
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
from pymilvus import MilvusClient, DataType
|
| 13 |
+
MILVUS_AVAILABLE = True
|
| 14 |
+
except ImportError:
|
| 15 |
+
MILVUS_AVAILABLE = False
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class MilvusResult:
|
| 20 |
+
text: str
|
| 21 |
+
score: float
|
| 22 |
+
metadata: Dict[str, Any]
|
| 23 |
+
collection: str
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class MilvusRetriever:
|
| 27 |
+
"""
|
| 28 |
+
Milvus retriever using Zilliz Cloud clusters.
|
| 29 |
+
Supports both Free-01 and Serverless-01 endpoints.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
CLUSTERS = {
|
| 33 |
+
"free-01": "https://in03-9e3e3e3e3e3e3e3e.api.aws-eu-central-1.zillizcloud.com", # placeholder
|
| 34 |
+
"serverless-01": "https://in03-serverless.api.aws-eu-central-1.zillizcloud.com", # placeholder
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
def __init__(
|
| 38 |
+
self,
|
| 39 |
+
cluster: str = "serverless-01",
|
| 40 |
+
api_key: Optional[str] = None,
|
| 41 |
+
uri: Optional[str] = None,
|
| 42 |
+
top_k: int = 10,
|
| 43 |
+
):
|
| 44 |
+
if not MILVUS_AVAILABLE:
|
| 45 |
+
raise ImportError("pymilvus not installed. Run: pip install pymilvus")
|
| 46 |
+
self.api_key = api_key or os.environ.get("MILVUS_API_KEY")
|
| 47 |
+
self.uri = uri or os.environ.get("MILVUS_URI") or self.CLUSTERS.get(cluster)
|
| 48 |
+
if not self.uri:
|
| 49 |
+
raise ValueError(f"MILVUS_URI or cluster name required. Known: {list(self.CLUSTERS.keys())}")
|
| 50 |
+
self.client = MilvusClient(uri=self.uri, token=self.api_key)
|
| 51 |
+
self.top_k = top_k
|
| 52 |
+
|
| 53 |
+
def search(self, collection: str, query: str, top_k: Optional[int] = None) -> List[MilvusResult]:
|
| 54 |
+
"""Search a Milvus collection for relevant documents."""
|
| 55 |
+
k = top_k or self.top_k
|
| 56 |
+
results = self.client.search(
|
| 57 |
+
collection_name=collection,
|
| 58 |
+
data=[query],
|
| 59 |
+
limit=k,
|
| 60 |
+
output_fields=["text", "metadata"],
|
| 61 |
+
)
|
| 62 |
+
return [
|
| 63 |
+
MilvusResult(
|
| 64 |
+
text=r.get("entity", {}).get("text", ""),
|
| 65 |
+
score=r.get("distance", 0.0),
|
| 66 |
+
metadata=r.get("entity", {}).get("metadata", {}),
|
| 67 |
+
collection=collection,
|
| 68 |
+
)
|
| 69 |
+
for r in results[0] # Milvus returns list of lists
|
| 70 |
+
]
|
| 71 |
+
|
| 72 |
+
def get_evidence(self, collection: str, query: str) -> List[Dict[str, Any]]:
|
| 73 |
+
"""Format Milvus results for CK-PLUG coupling."""
|
| 74 |
+
results = self.search(collection, query)
|
| 75 |
+
return [
|
| 76 |
+
{"text": r.text, "relevance": r.score, "collection": r.collection}
|
| 77 |
+
for r in results
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class MockMilvusRetriever:
|
| 82 |
+
"""Offline mock of MilvusRetriever."""
|
| 83 |
+
|
| 84 |
+
def __init__(self, top_k: int = 5):
|
| 85 |
+
self.top_k = top_k
|
| 86 |
+
self._collections = {
|
| 87 |
+
"nexus_docs": [
|
| 88 |
+
{"text": "NEXUS OS v2 uses BEC thermodynamic control for reasoning stability.", "score": 0.92},
|
| 89 |
+
{"text": "Jarzynski equality filters non-equilibrium hallucination trajectories.", "score": 0.85},
|
| 90 |
+
{"text": "ChimeraRouter selects between 35+ local models and 6 cloud APIs.", "score": 0.80},
|
| 91 |
+
],
|
| 92 |
+
"research_papers": [
|
| 93 |
+
{"text": "CK-PLUG modulates token-level knowledge reliance via Confidence Gain.", "score": 0.94},
|
| 94 |
+
{"text": "Arnold et al. detect phase transitions in LLM output distributions.", "score": 0.87},
|
| 95 |
+
{"text": "Kim derives intelligence Lagrangian on Fisher-metric manifold.", "score": 0.83},
|
| 96 |
+
],
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
def search(self, collection: str, query: str, top_k: Optional[int] = None) -> List[MilvusResult]:
|
| 100 |
+
k = top_k or self.top_k
|
| 101 |
+
docs = self._collections.get(collection, [])
|
| 102 |
+
# Simple keyword scoring
|
| 103 |
+
scored = []
|
| 104 |
+
for doc in docs:
|
| 105 |
+
overlap = len(set(query.lower().split()) & set(doc["text"].lower().split()))
|
| 106 |
+
score = doc["score"] * (0.5 + 0.5 * (overlap / max(1, len(query.split()))))
|
| 107 |
+
scored.append(MilvusResult(text=doc["text"], score=score, metadata={}, collection=collection))
|
| 108 |
+
scored.sort(key=lambda x: x.score, reverse=True)
|
| 109 |
+
return scored[:k]
|
| 110 |
+
|
| 111 |
+
def get_evidence(self, collection: str, query: str) -> List[Dict[str, Any]]:
|
| 112 |
+
results = self.search(collection, query)
|
| 113 |
+
return [{"text": r.text, "relevance": r.score, "collection": r.collection} for r in results]
|