Spaces:

specimba
/

nexus-os-space

Running

App Files Files Community

specimba commited on 6 days ago

Commit

1d23314

verified ·

1 Parent(s): 5466f63

Copy nexus_os_v2/milvus_client.py from dataset for module imports

Browse files

Files changed (1) hide show

nexus_os_v2/milvus_client.py +113 -0

nexus_os_v2/milvus_client.py ADDED Viewed

	@@ -0,0 +1,113 @@

+"""
+Milvus Retriever Client for NEXUS OS v2
+Uses Free-01 and Serverless-01 clusters (aws-eu-central-1).
+API key: loaded from env MILVUS_API_KEY or MILVUS_URI
+"""
+import os
+from typing import List, Dict, Optional, Any
+from dataclasses import dataclass
+try:
+    from pymilvus import MilvusClient, DataType
+    MILVUS_AVAILABLE = True
+except ImportError:
+    MILVUS_AVAILABLE = False
+@dataclass
+class MilvusResult:
+    text: str
+    score: float
+    metadata: Dict[str, Any]
+    collection: str
+class MilvusRetriever:
+    """
+    Milvus retriever using Zilliz Cloud clusters.
+    Supports both Free-01 and Serverless-01 endpoints.
+    """
+    CLUSTERS = {
+        "free-01": "https://in03-9e3e3e3e3e3e3e3e.api.aws-eu-central-1.zillizcloud.com",  # placeholder
+        "serverless-01": "https://in03-serverless.api.aws-eu-central-1.zillizcloud.com",  # placeholder
+    }
+    def __init__(
+        self,
+        cluster: str = "serverless-01",
+        api_key: Optional[str] = None,
+        uri: Optional[str] = None,
+        top_k: int = 10,
+    ):
+        if not MILVUS_AVAILABLE:
+            raise ImportError("pymilvus not installed. Run: pip install pymilvus")
+        self.api_key = api_key or os.environ.get("MILVUS_API_KEY")
+        self.uri = uri or os.environ.get("MILVUS_URI") or self.CLUSTERS.get(cluster)
+        if not self.uri:
+            raise ValueError(f"MILVUS_URI or cluster name required. Known: {list(self.CLUSTERS.keys())}")
+        self.client = MilvusClient(uri=self.uri, token=self.api_key)
+        self.top_k = top_k
+    def search(self, collection: str, query: str, top_k: Optional[int] = None) -> List[MilvusResult]:
+        """Search a Milvus collection for relevant documents."""
+        k = top_k or self.top_k
+        results = self.client.search(
+            collection_name=collection,
+            data=[query],
+            limit=k,
+            output_fields=["text", "metadata"],
+        )
+        return [
+            MilvusResult(
+                text=r.get("entity", {}).get("text", ""),
+                score=r.get("distance", 0.0),
+                metadata=r.get("entity", {}).get("metadata", {}),
+                collection=collection,
+            )
+            for r in results[0]  # Milvus returns list of lists
+        ]
+    def get_evidence(self, collection: str, query: str) -> List[Dict[str, Any]]:
+        """Format Milvus results for CK-PLUG coupling."""
+        results = self.search(collection, query)
+        return [
+            {"text": r.text, "relevance": r.score, "collection": r.collection}
+            for r in results
+        ]
+class MockMilvusRetriever:
+    """Offline mock of MilvusRetriever."""
+    def __init__(self, top_k: int = 5):
+        self.top_k = top_k
+        self._collections = {
+            "nexus_docs": [
+                {"text": "NEXUS OS v2 uses BEC thermodynamic control for reasoning stability.", "score": 0.92},
+                {"text": "Jarzynski equality filters non-equilibrium hallucination trajectories.", "score": 0.85},
+                {"text": "ChimeraRouter selects between 35+ local models and 6 cloud APIs.", "score": 0.80},
+            ],
+            "research_papers": [
+                {"text": "CK-PLUG modulates token-level knowledge reliance via Confidence Gain.", "score": 0.94},
+                {"text": "Arnold et al. detect phase transitions in LLM output distributions.", "score": 0.87},
+                {"text": "Kim derives intelligence Lagrangian on Fisher-metric manifold.", "score": 0.83},
+            ],
+        }
+    def search(self, collection: str, query: str, top_k: Optional[int] = None) -> List[MilvusResult]:
+        k = top_k or self.top_k
+        docs = self._collections.get(collection, [])
+        # Simple keyword scoring
+        scored = []
+        for doc in docs:
+            overlap = len(set(query.lower().split()) & set(doc["text"].lower().split()))
+            score = doc["score"] * (0.5 + 0.5 * (overlap / max(1, len(query.split()))))
+            scored.append(MilvusResult(text=doc["text"], score=score, metadata={}, collection=collection))
+        scored.sort(key=lambda x: x.score, reverse=True)
+        return scored[:k]
+    def get_evidence(self, collection: str, query: str) -> List[Dict[str, Any]]:
+        results = self.search(collection, query)
+        return [{"text": r.text, "relevance": r.score, "collection": r.collection} for r in results]