File size: 6,692 Bytes
674fb4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""
Core data models for Graph RAG Service
Extended with: temporal fields, tenant support, eval/confidence models
"""

from pydantic import BaseModel, Field, ConfigDict
from typing import Optional, Dict, List, Any, Literal
from datetime import datetime, timezone
from enum import Enum


class NodeType(str, Enum):
    """Types of nodes in the knowledge graph"""
    ENTITY = "entity"
    CHUNK = "chunk"
    DOCUMENT = "document"


class RelationType(str, Enum):
    """Types of relationships in the knowledge graph"""
    MENTIONS = "MENTIONS"
    RELATED_TO = "RELATED_TO"
    PART_OF = "PART_OF"
    CONTAINS = "CONTAINS"


class OntologyVersion(str, Enum):
    """Ontology versions for schema evolution"""
    V1_0 = "v1.0"
    V1_1 = "v1.1"
    V2_0 = "v2.0"


class Entity(BaseModel):
    """Entity in the knowledge graph"""
    id: Optional[str] = None
    name: str
    type: str
    properties: Dict[str, Any] = Field(default_factory=dict)
    embedding: Optional[List[float]] = None
    ontology_version: str = "v1.0"
    confidence: float = 1.0
    # Temporal support (Gap #5)
    valid_from: Optional[datetime] = None
    valid_until: Optional[datetime] = None
    # Tenant support (Gap #7)
    tenant_id: Optional[str] = None
    # Community support (Gap #2)
    community_id: Optional[int] = None

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "name": "Lyzr AI",
                "type": "Company",
                "properties": {"industry": "AI", "founded": "2023"},
                "confidence": 0.95,
                "tenant_id": "org_abc123"
            }
        }
    )


class Relationship(BaseModel):
    """Relationship between entities"""
    source: str
    target: str
    type: str
    properties: Dict[str, Any] = Field(default_factory=dict)
    confidence: float = 1.0
    ontology_version: str = "v1.0"
    # Temporal support (Gap #5)
    valid_from: Optional[datetime] = None
    valid_until: Optional[datetime] = None
    source_document_id: Optional[str] = None
    source_chunk_id: Optional[str] = None
    # Tenant support (Gap #7)
    tenant_id: Optional[str] = None

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "source": "Lyzr AI",
                "target": "OpenAI",
                "type": "PARTNERS_WITH",
                "confidence": 0.9,
                "valid_from": "2023-01-01T00:00:00"
            }
        }
    )


class Chunk(BaseModel):
    """Text chunk from document"""
    id: Optional[str] = None
    text: str
    document_id: str
    metadata: Dict[str, Any] = Field(default_factory=dict)
    embedding: Optional[List[float]] = None
    chunk_index: int = 0
    # Extended metadata for citation tracing
    page_number: Optional[int] = None
    section_title: Optional[str] = None
    tenant_id: Optional[str] = None


class Document(BaseModel):
    """Document metadata"""
    id: Optional[str] = None
    filename: str
    file_type: str
    content: Optional[str] = None
    size_bytes: int
    upload_date: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None))
    processed: bool = False
    metadata: Dict[str, Any] = Field(default_factory=dict)
    tenant_id: Optional[str] = None


class OntologySchema(BaseModel):
    """Ontology schema definition"""
    version: str = "v1.0"
    entity_types: List[str] = Field(default_factory=list)
    relationship_types: List[str] = Field(default_factory=list)
    properties: Dict[str, List[str]] = Field(default_factory=dict)
    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None))
    approved: bool = False


class ExtractionResult(BaseModel):
    """Result of entity/relationship extraction"""
    entities: List[Entity] = Field(default_factory=list)
    relationships: List[Relationship] = Field(default_factory=list)
    chunks: List[Chunk] = Field(default_factory=list)
    ontology_version: str = "v1.0"
    processing_time_seconds: float = 0.0


class ConfidenceJudgment(BaseModel):
    """LLM-as-a-Judge confidence assessment (Gap #4)"""
    score: float = Field(..., ge=0.0, le=1.0, description="0.0-1.0 grounding score")
    reasoning: str = Field(default="", description="Why this confidence score was assigned")
    grounded_claims: int = Field(default=0, description="# claims backed by retrieved context")
    ungrounded_claims: int = Field(default=0, description="# claims not traceable to context")
    hallucination_risk: Literal["low", "medium", "high"] = Field(default="low")


class QueryResult(BaseModel):
    """Result of a retrieval query — enriched with confidence judgment"""
    answer: str
    sources: List[Dict[str, Any]] = Field(default_factory=list)
    reasoning_chain: List[str] = Field(default_factory=list)
    confidence: float = 1.0
    # Gap #4 — real confidence metrics
    confidence_judgment: Optional[ConfidenceJudgment] = None
    retrieval_method: str = "hybrid"
    processing_time_seconds: float = 0.0
    # Gap #3 — DRIFT query metadata
    drift_expanded: bool = False
    total_sub_queries: int = 1


class AgentState(BaseModel):
    """State of the agentic retrieval system"""
    query: str
    decomposed_queries: List[str] = Field(default_factory=list)
    retrieved_contexts: List[Dict[str, Any]] = Field(default_factory=list)
    reasoning_steps: List[str] = Field(default_factory=list)
    final_answer: Optional[str] = None
    iteration: int = 0
    confidence: float = 0.0


class SearchMethod(str, Enum):
    """Search methods for retrieval"""
    VECTOR = "vector"
    GRAPH = "graph"
    FILTER = "filter"
    HYBRID = "hybrid"
    COMMUNITY = "community"
    CYPHER = "cypher"


class EvalResult(BaseModel):
    """RAG evaluation result (Gap #8)"""
    question: str
    answer: str
    faithfulness: float = Field(..., ge=0.0, le=1.0)
    answer_relevancy: float = Field(..., ge=0.0, le=1.0)
    context_precision: float = Field(..., ge=0.0, le=1.0)
    context_recall: float = Field(default=0.0, ge=0.0, le=1.0)
    overall_score: float = Field(..., ge=0.0, le=1.0)
    hallucination_detected: bool = False
    timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None))
    document_id: Optional[str] = None


class CommunityReport(BaseModel):
    """Community summary for LazyGraphRAG (Gap #2)"""
    community_id: int
    entity_count: int
    entities: List[str]
    summary: str
    themes: List[str] = Field(default_factory=list)
    relevance_score: float = 0.0
    generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None))