""" Core data models for Graph RAG Service Extended with: temporal fields, tenant support, eval/confidence models """ from pydantic import BaseModel, Field, ConfigDict from typing import Optional, Dict, List, Any, Literal from datetime import datetime, timezone from enum import Enum class NodeType(str, Enum): """Types of nodes in the knowledge graph""" ENTITY = "entity" CHUNK = "chunk" DOCUMENT = "document" class RelationType(str, Enum): """Types of relationships in the knowledge graph""" MENTIONS = "MENTIONS" RELATED_TO = "RELATED_TO" PART_OF = "PART_OF" CONTAINS = "CONTAINS" class OntologyVersion(str, Enum): """Ontology versions for schema evolution""" V1_0 = "v1.0" V1_1 = "v1.1" V2_0 = "v2.0" class Entity(BaseModel): """Entity in the knowledge graph""" id: Optional[str] = None name: str type: str properties: Dict[str, Any] = Field(default_factory=dict) embedding: Optional[List[float]] = None ontology_version: str = "v1.0" confidence: float = 1.0 # Temporal support (Gap #5) valid_from: Optional[datetime] = None valid_until: Optional[datetime] = None # Tenant support (Gap #7) tenant_id: Optional[str] = None # Community support (Gap #2) community_id: Optional[int] = None model_config = ConfigDict( json_schema_extra={ "example": { "name": "Lyzr AI", "type": "Company", "properties": {"industry": "AI", "founded": "2023"}, "confidence": 0.95, "tenant_id": "org_abc123" } } ) class Relationship(BaseModel): """Relationship between entities""" source: str target: str type: str properties: Dict[str, Any] = Field(default_factory=dict) confidence: float = 1.0 ontology_version: str = "v1.0" # Temporal support (Gap #5) valid_from: Optional[datetime] = None valid_until: Optional[datetime] = None source_document_id: Optional[str] = None source_chunk_id: Optional[str] = None # Tenant support (Gap #7) tenant_id: Optional[str] = None model_config = ConfigDict( json_schema_extra={ "example": { "source": "Lyzr AI", "target": "OpenAI", "type": "PARTNERS_WITH", "confidence": 0.9, "valid_from": "2023-01-01T00:00:00" } } ) class Chunk(BaseModel): """Text chunk from document""" id: Optional[str] = None text: str document_id: str metadata: Dict[str, Any] = Field(default_factory=dict) embedding: Optional[List[float]] = None chunk_index: int = 0 # Extended metadata for citation tracing page_number: Optional[int] = None section_title: Optional[str] = None tenant_id: Optional[str] = None class Document(BaseModel): """Document metadata""" id: Optional[str] = None filename: str file_type: str content: Optional[str] = None size_bytes: int upload_date: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None)) processed: bool = False metadata: Dict[str, Any] = Field(default_factory=dict) tenant_id: Optional[str] = None class OntologySchema(BaseModel): """Ontology schema definition""" version: str = "v1.0" entity_types: List[str] = Field(default_factory=list) relationship_types: List[str] = Field(default_factory=list) properties: Dict[str, List[str]] = Field(default_factory=dict) created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None)) approved: bool = False class ExtractionResult(BaseModel): """Result of entity/relationship extraction""" entities: List[Entity] = Field(default_factory=list) relationships: List[Relationship] = Field(default_factory=list) chunks: List[Chunk] = Field(default_factory=list) ontology_version: str = "v1.0" processing_time_seconds: float = 0.0 class ConfidenceJudgment(BaseModel): """LLM-as-a-Judge confidence assessment (Gap #4)""" score: float = Field(..., ge=0.0, le=1.0, description="0.0-1.0 grounding score") reasoning: str = Field(default="", description="Why this confidence score was assigned") grounded_claims: int = Field(default=0, description="# claims backed by retrieved context") ungrounded_claims: int = Field(default=0, description="# claims not traceable to context") hallucination_risk: Literal["low", "medium", "high"] = Field(default="low") class QueryResult(BaseModel): """Result of a retrieval query — enriched with confidence judgment""" answer: str sources: List[Dict[str, Any]] = Field(default_factory=list) reasoning_chain: List[str] = Field(default_factory=list) confidence: float = 1.0 # Gap #4 — real confidence metrics confidence_judgment: Optional[ConfidenceJudgment] = None retrieval_method: str = "hybrid" processing_time_seconds: float = 0.0 # Gap #3 — DRIFT query metadata drift_expanded: bool = False total_sub_queries: int = 1 class AgentState(BaseModel): """State of the agentic retrieval system""" query: str decomposed_queries: List[str] = Field(default_factory=list) retrieved_contexts: List[Dict[str, Any]] = Field(default_factory=list) reasoning_steps: List[str] = Field(default_factory=list) final_answer: Optional[str] = None iteration: int = 0 confidence: float = 0.0 class SearchMethod(str, Enum): """Search methods for retrieval""" VECTOR = "vector" GRAPH = "graph" FILTER = "filter" HYBRID = "hybrid" COMMUNITY = "community" CYPHER = "cypher" class EvalResult(BaseModel): """RAG evaluation result (Gap #8)""" question: str answer: str faithfulness: float = Field(..., ge=0.0, le=1.0) answer_relevancy: float = Field(..., ge=0.0, le=1.0) context_precision: float = Field(..., ge=0.0, le=1.0) context_recall: float = Field(default=0.0, ge=0.0, le=1.0) overall_score: float = Field(..., ge=0.0, le=1.0) hallucination_detected: bool = False timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None)) document_id: Optional[str] = None class CommunityReport(BaseModel): """Community summary for LazyGraphRAG (Gap #2)""" community_id: int entity_count: int entities: List[str] summary: str themes: List[str] = Field(default_factory=list) relevance_score: float = 0.0 generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None))