drewli20200316 commited on
Commit
0e20429
·
verified ·
1 Parent(s): dae033a

Add test/test2.py

Browse files
Files changed (1) hide show
  1. test/test2.py +1034 -0
test/test2.py ADDED
@@ -0,0 +1,1034 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ================================================================
3
+ 医疗 RAG Agent 集成测试 — 多步骤工具链协作
4
+ ================================================================
5
+ 测试层级:
6
+ 单元测试 (test1.py): 单工具调用准确性 ← 已完成
7
+ 集成测试 (test2.py): 多步骤工具链协作 ← 当前文件
8
+ 回归测试 / 压力测试 / 安全测试: ← 下一步
9
+
10
+ 测试场景:
11
+ 场景 1: 完整 RAG 全链路 (Milvus → PDF → Neo4j → LLM)
12
+ 场景 2: Redis 缓存集成 (Miss → RAG → 写缓存 → 再次查询 Hit)
13
+ 场景 3: Neo4j 降级 (Cypher 服务宕机 → Milvus + PDF 继续回答)
14
+ 场景 4: PDF 降级 (检索失败 → Milvus + Neo4j 继续回答)
15
+ 场景 5: Milvus 降级 (向量库异常 → PDF + Neo4j 继续回答)
16
+ 场景 6: 多组件同时降级 (Neo4j + PDF 都挂 → 只靠 Milvus)
17
+ 场景 7: 全部降级 (三路召回都挂 → LLM 依赖自身经验)
18
+ 场景 8: Chatbot 端点完整流程 (HTTP请求 → Redis → RAG → 响应)
19
+ 场景 9: 并发请求下的 Redis 锁 + RAG 协作
20
+ 场景 10: 数据入库全链路 (JSONL预处理 → Embedding → Milvus)
21
+
22
+ 核心理念:
23
+ 单元测试问 "每个工具自己对不对?"
24
+ 集成测试问 "工具串起来后, 整条链路对不对?"
25
+
26
+ 运行:
27
+ pytest test2.py -v --tb=short
28
+ pytest test2.py -v -k "full_pipeline" # 只跑全链路
29
+ pytest test2.py -v -k "degrade" # 只跑降级场景
30
+ pytest test2.py -v -k "redis" # 只跑缓存集成
31
+ ================================================================
32
+ """
33
+
34
+ import sys
35
+ import os
36
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
37
+
38
+ import types
39
+ import pytest
40
+ import json
41
+ import hashlib
42
+ import time
43
+ import uuid
44
+ import random
45
+ import threading
46
+ from unittest.mock import MagicMock, patch, call
47
+ from dataclasses import dataclass, field
48
+ from typing import Optional, List, Callable
49
+
50
+
51
+ # ================================================================
52
+ # 前置: Mock 缺失的第三方依赖
53
+ # ================================================================
54
+
55
+ def _ensure_mock_module(name):
56
+ if name not in sys.modules:
57
+ sys.modules[name] = MagicMock()
58
+
59
+ _MOCK_MODULES = [
60
+ "langchain_classic", "langchain_classic.retrievers",
61
+ "langchain_classic.retrievers.parent_document_retriever",
62
+ "langchain_milvus", "langchain_text_splitters",
63
+ "langchain_core", "langchain_core.stores", "langchain_core.documents",
64
+ "langchain.embeddings", "langchain.embeddings.base",
65
+ "neo4j", "dotenv", "uvicorn",
66
+ "fastapi", "fastapi.middleware", "fastapi.middleware.cors",
67
+ ]
68
+ for mod in _MOCK_MODULES:
69
+ _ensure_mock_module(mod)
70
+
71
+ class _FakeEmbeddingsBase:
72
+ pass
73
+
74
+ sys.modules["langchain.embeddings.base"].Embeddings = _FakeEmbeddingsBase
75
+
76
+
77
+ # ================================================================
78
+ # 测试基础设施
79
+ # ================================================================
80
+
81
+ @dataclass
82
+ class FakeDocument:
83
+ """模拟 LangChain Document"""
84
+ page_content: str
85
+ metadata: dict = field(default_factory=dict)
86
+
87
+
88
+ class FakeChatResponse:
89
+ """模拟 OpenAI Chat Completion 响应"""
90
+ def __init__(self, content):
91
+ msg = type('Msg', (), {'content': content})()
92
+ choice = type('Choice', (), {'message': msg})()
93
+ self.choices = [choice]
94
+
95
+
96
+ class FakeRedisClient:
97
+ """内存字典模拟 Redis"""
98
+ def __init__(self):
99
+ self._store = {}
100
+ self._expiry = {}
101
+
102
+ def ping(self): return True
103
+
104
+ def get(self, key):
105
+ return self._store.get(key, None)
106
+
107
+ def set(self, key, value, ex=None, nx=False):
108
+ if nx and key in self._store:
109
+ return False
110
+ self._store[key] = value
111
+ if ex: self._expiry[key] = ex
112
+ return True
113
+
114
+ def setex(self, key, expire, value):
115
+ self._store[key] = value
116
+ self._expiry[key] = expire
117
+ return True
118
+
119
+ def delete(self, key):
120
+ return 1 if self._store.pop(key, None) is not None else 0
121
+
122
+ def register_script(self, script):
123
+ def fake_script(keys=None, args=None):
124
+ if keys and args and self._store.get(keys[0]) == args[0]:
125
+ del self._store[keys[0]]
126
+ return 1
127
+ return 0
128
+ return fake_script
129
+
130
+
131
+ def make_redis_manager():
132
+ """构造注入假 Redis 的 RedisClientWrapper"""
133
+ from new_redis import RedisClientWrapper
134
+ RedisClientWrapper._pool = "FAKE"
135
+ mgr = object.__new__(RedisClientWrapper)
136
+ mgr.client = FakeRedisClient()
137
+ mgr.unlock_script = mgr.client.register_script("")
138
+ return mgr
139
+
140
+
141
+ # ================================================================
142
+ # 核心: 可测试版本的 perform_rag_and_llm
143
+ #
144
+ # 原始 agent4.py 使用全局变量 (milvus_vectorstore, parent_retriever,
145
+ # driver, client_llm), 无法直接在测试中注入 Mock.
146
+ # 这里提取相同逻辑, 但通过参数传入��赖, 实现 "依赖注入" 测试模式.
147
+ # ================================================================
148
+
149
+ def perform_rag_and_llm_testable(
150
+ query: str,
151
+ milvus_vectorstore,
152
+ parent_retriever,
153
+ neo4j_driver,
154
+ llm_client,
155
+ cypher_endpoint: str = "http://0.0.0.0:8101",
156
+ requests_module=None,
157
+ ) -> str:
158
+ """
159
+ 与 agent4.py 中 perform_rag_and_llm 完全相同的逻辑,
160
+ 但所有外部依赖通过参数注入, 而非使用全局变量.
161
+ """
162
+ import json as _json
163
+ if requests_module is None:
164
+ import requests as requests_module
165
+
166
+ # ---- Step 1: Milvus 向量召回 ----
167
+ try:
168
+ recall_results = milvus_vectorstore.similarity_search(
169
+ query, k=10, ranker_type="rrf", ranker_params={"k": 100}
170
+ )
171
+ context = "\n\n".join(d.page_content for d in recall_results) if recall_results else ""
172
+ except Exception as e:
173
+ print(f"Milvus 异常: {e}")
174
+ context = ""
175
+
176
+ # ---- Step 2: PDF 父子文档检索 ----
177
+ pdf_res = ""
178
+ try:
179
+ retrieved_docs = parent_retriever.invoke(query)
180
+ if retrieved_docs is not None and len(retrieved_docs) >= 1:
181
+ pdf_res = retrieved_docs[0].page_content
182
+ except Exception as e:
183
+ print(f"PDF 检索异常: {e}")
184
+
185
+ context = context + "\n" + pdf_res
186
+
187
+ # ---- Step 3: Neo4j 图数据库精准召回 ----
188
+ neo4j_res = ""
189
+ data = {"natural_language_query": query}
190
+ data_json = _json.dumps(data)
191
+
192
+ try:
193
+ cypher_response = requests_module.post(f"{cypher_endpoint}/generate", data_json)
194
+
195
+ if cypher_response.status_code == 200:
196
+ cypher_response_data = cypher_response.json()
197
+ cypher_query = cypher_response_data["cypher_query"]
198
+ confidence = cypher_response_data["confidence"]
199
+ is_valid = cypher_response_data["validated"]
200
+
201
+ if cypher_query is not None and float(confidence) >= 0.9 and is_valid == True:
202
+ # 二次校验
203
+ validate_data = _json.dumps({"cypher_query": cypher_query})
204
+ cypher_valid = requests_module.post(f"{cypher_endpoint}/validate", validate_data)
205
+
206
+ if cypher_valid.status_code == 200:
207
+ if cypher_valid.json()["is_valid"] == True:
208
+ with neo4j_driver.session() as session:
209
+ try:
210
+ record = session.run(cypher_query)
211
+ result = list(map(lambda x: x[0], record))
212
+ neo4j_res = ','.join(result)
213
+ except Exception as e:
214
+ print(f"neo4j查询失败: {e}")
215
+ neo4j_res = ""
216
+ except Exception as e:
217
+ print(f"neo4j API 服务不可用: {e}")
218
+
219
+ # 合并三路结果
220
+ context = context + "\n" + neo4j_res
221
+
222
+ # ---- Step 4: LLM 推理 ----
223
+ SYSTEM_PROMPT = """
224
+ System: 你是一个非常得力的医学助手, 你可以通过从数据库中检索出的信息找到问题的答案.
225
+ """
226
+ USER_PROMPT = f"""
227
+ User: 利用介于<context>和</context>之间的从数据库中检索出的信息来回答问题.
228
+ <context>
229
+ {context}
230
+ </context>
231
+
232
+ <question>
233
+ {query}
234
+ </question>
235
+ """
236
+
237
+ response = llm_client.chat.completions.create(
238
+ model="gpt-4o-mini",
239
+ messages=[{"role": "user", "content": SYSTEM_PROMPT + USER_PROMPT}],
240
+ temperature=0.7,
241
+ )
242
+
243
+ return response.choices[0].message.content
244
+
245
+
246
+ # ================================================================
247
+ # 工厂方法: 快速构建各组件的 Mock
248
+ # ================================================================
249
+
250
+ def make_milvus_mock(docs=None, raise_error=False):
251
+ """构造 Milvus Mock, 可配置返回文档或抛异常"""
252
+ mock = MagicMock()
253
+ if raise_error:
254
+ mock.similarity_search.side_effect = ConnectionError("Milvus 连接超时")
255
+ else:
256
+ mock.similarity_search.return_value = docs or [
257
+ FakeDocument(page_content="高血压患者应控制每日钠摄入量不超过6克"),
258
+ FakeDocument(page_content="建议多食用富含钾的蔬果, 如香蕉、菠菜"),
259
+ ]
260
+ return mock
261
+
262
+
263
+ def make_pdf_mock(content=None, raise_error=False):
264
+ """构造 PDF Retriever Mock"""
265
+ mock = MagicMock()
266
+ if raise_error:
267
+ mock.invoke.side_effect = Exception("PDF 索引损坏")
268
+ elif content is None:
269
+ mock.invoke.return_value = [
270
+ FakeDocument(page_content="根据《中国高血压防治指南(2024版)》第三章: 高血压分为1级、2级、3级")
271
+ ]
272
+ elif content == "":
273
+ mock.invoke.return_value = []
274
+ else:
275
+ mock.invoke.return_value = [FakeDocument(page_content=content)]
276
+ return mock
277
+
278
+
279
+ def make_neo4j_driver_mock(results=None, raise_error=False):
280
+ """构造 Neo4j Driver Mock"""
281
+ mock_driver = MagicMock()
282
+ mock_session = MagicMock()
283
+
284
+ if raise_error:
285
+ mock_session.run.side_effect = Exception("Neo4j 节点不存在")
286
+ else:
287
+ mock_session.run.return_value = results or [("高血压",), ("动脉硬化",)]
288
+
289
+ mock_driver.session.return_value.__enter__ = MagicMock(return_value=mock_session)
290
+ mock_driver.session.return_value.__exit__ = MagicMock(return_value=False)
291
+ return mock_driver
292
+
293
+
294
+ def make_llm_mock(answer="高血压患者应避免高盐、高脂肪饮食, 建议低盐低脂饮食。"):
295
+ """构造 LLM Mock"""
296
+ mock = MagicMock()
297
+ mock.chat.completions.create.return_value = FakeChatResponse(answer)
298
+ return mock
299
+
300
+
301
+ def make_requests_mock(
302
+ generate_response=None,
303
+ validate_response=None,
304
+ generate_error=False,
305
+ ):
306
+ """构造 requests 模块 Mock (模拟 Cypher 生成/校验 HTTP 调用)"""
307
+ mock = MagicMock()
308
+
309
+ if generate_error:
310
+ mock.post.side_effect = ConnectionError("Cypher 服务不可用")
311
+ return mock
312
+
313
+ # 默认: 高置信度有效 Cypher
314
+ gen_resp = MagicMock()
315
+ gen_resp.status_code = 200
316
+ gen_resp.json.return_value = generate_response or {
317
+ "cypher_query": "MATCH (d:Disease {name:'高血压'})-[:has_common_drug]->(m) RETURN m.name",
318
+ "confidence": 0.95,
319
+ "validated": True,
320
+ }
321
+
322
+ val_resp = MagicMock()
323
+ val_resp.status_code = 200
324
+ val_resp.json.return_value = validate_response or {"is_valid": True}
325
+
326
+ # 第一次调用 = /generate, 第二次调用 = /validate
327
+ mock.post.side_effect = [gen_resp, val_resp]
328
+ return mock
329
+
330
+
331
+ # ================================================================
332
+ # 场景 1: 完整 RAG 全链路 (Happy Path)
333
+ # ================================================================
334
+
335
+ class TestFullPipeline:
336
+ """Milvus + PDF + Neo4j 三路全部成功 → 合并 context → LLM 生成回答"""
337
+
338
+ def test_all_three_sources_contribute_to_context(self):
339
+ """验证三路召回结果都出现在 LLM 收到的 prompt 中"""
340
+ milvus = make_milvus_mock([FakeDocument(page_content="MILVUS_低盐饮食")])
341
+ pdf = make_pdf_mock(content="PDF_高血压防治指南")
342
+ neo4j = make_neo4j_driver_mock([("NEO4J_降压药",)])
343
+ llm = make_llm_mock()
344
+ req = make_requests_mock()
345
+
346
+ perform_rag_and_llm_testable(
347
+ "高血压不能吃什么?", milvus, pdf, neo4j, llm, requests_module=req
348
+ )
349
+
350
+ # 检查 LLM 收到的 prompt 包含三路内容
351
+ actual_prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
352
+ assert "MILVUS_低盐饮食" in actual_prompt, "Milvus 结果应出现在 prompt"
353
+ assert "PDF_高血压防治指南" in actual_prompt, "PDF 结果应出现在 prompt"
354
+ assert "NEO4J_降压药" in actual_prompt, "Neo4j 结果应出现在 prompt"
355
+
356
+ def test_full_pipeline_returns_llm_answer(self):
357
+ """完整链路最终返回 LLM 的回答"""
358
+ expected_answer = "高血压患者应限制盐分摄入, 每日不超过6克。"
359
+ llm = make_llm_mock(expected_answer)
360
+ result = perform_rag_and_llm_testable(
361
+ "高血压饮食", make_milvus_mock(), make_pdf_mock(), make_neo4j_driver_mock(),
362
+ llm, requests_module=make_requests_mock()
363
+ )
364
+ assert result == expected_answer
365
+
366
+ def test_prompt_contains_question(self):
367
+ """用户原始问题应出现在 <question> 标签中"""
368
+ llm = make_llm_mock()
369
+ perform_rag_and_llm_testable(
370
+ "糖尿病能吃西瓜吗?", make_milvus_mock(), make_pdf_mock(),
371
+ make_neo4j_driver_mock(), llm, requests_module=make_requests_mock()
372
+ )
373
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
374
+ assert "糖尿病能吃西瓜吗?" in prompt
375
+
376
+ def test_neo4j_cypher_validation_flow(self):
377
+ """验证 Cypher 生成 → 校验 → 执行的完整三步骤"""
378
+ req = make_requests_mock()
379
+ neo4j = make_neo4j_driver_mock([("阿司匹林",), ("氨氯地平",)])
380
+
381
+ result = perform_rag_and_llm_testable(
382
+ "高血压常用药物", make_milvus_mock(), make_pdf_mock(),
383
+ neo4j, make_llm_mock(), requests_module=req
384
+ )
385
+
386
+ # 验证两次 HTTP 调用: /generate + /validate
387
+ assert req.post.call_count == 2
388
+ first_call_url = req.post.call_args_list[0][0][0]
389
+ second_call_url = req.post.call_args_list[1][0][0]
390
+ assert "/generate" in first_call_url
391
+ assert "/validate" in second_call_url
392
+
393
+ def test_llm_model_and_temperature(self):
394
+ """验证 LLM 调用使用正确的 model 和 temperature"""
395
+ llm = make_llm_mock()
396
+ perform_rag_and_llm_testable(
397
+ "test", make_milvus_mock(), make_pdf_mock(),
398
+ make_neo4j_driver_mock(), llm, requests_module=make_requests_mock()
399
+ )
400
+ kwargs = llm.chat.completions.create.call_args.kwargs
401
+ assert kwargs["model"] == "gpt-4o-mini"
402
+ assert kwargs["temperature"] == 0.7
403
+
404
+
405
+ # ================================================================
406
+ # 场景 2: Redis 缓存 + RAG 集成
407
+ # ================================================================
408
+
409
+ class TestRedisCacheIntegration:
410
+ """测试 Redis 与 RAG 全链路的协作"""
411
+
412
+ def test_cache_miss_triggers_full_rag(self):
413
+ """首次查询: Redis miss → 执行完整 RAG → 写入缓存 → 返回结果"""
414
+ mgr = make_redis_manager()
415
+ rag_called = False
416
+
417
+ def fake_rag():
418
+ nonlocal rag_called
419
+ rag_called = True
420
+ return "LLM生成: 高血压要低盐饮食"
421
+
422
+ result = mgr.get_or_compute("高血压不能吃什么?", fake_rag)
423
+
424
+ assert rag_called is True, "缓存未命中时应调用 RAG"
425
+ assert result == "LLM生成: 高血压要低盐饮食"
426
+
427
+ def test_second_query_hits_cache(self):
428
+ """二次查询: 相同问题 → 直接走缓存, 不再调 RAG"""
429
+ mgr = make_redis_manager()
430
+ call_count = 0
431
+
432
+ def counting_rag():
433
+ nonlocal call_count
434
+ call_count += 1
435
+ return "RAG结果"
436
+
437
+ # 第一次: 走 RAG
438
+ mgr.get_or_compute("高血压饮食", counting_rag)
439
+ assert call_count == 1
440
+
441
+ # 第二次: 走缓存
442
+ result = mgr.get_or_compute("高血压饮食", counting_rag)
443
+ assert call_count == 1, "第二次应命中缓存, RAG 不应再被调用"
444
+ assert result == "RAG结果"
445
+
446
+ def test_different_questions_both_execute_rag(self):
447
+ """不同问题: 各自走 RAG, 各自缓存"""
448
+ mgr = make_redis_manager()
449
+ questions = []
450
+
451
+ def tracking_rag():
452
+ return f"答案_{len(questions)}"
453
+
454
+ mgr.get_or_compute("问题A", lambda: "答案A")
455
+ mgr.get_or_compute("问题B", lambda: "答案B")
456
+
457
+ assert mgr.get_answer("问题A") == "答案A"
458
+ assert mgr.get_answer("问题B") == "答案B"
459
+
460
+ def test_cache_miss_full_rag_then_cache_hit(self):
461
+ """完整场景: Redis miss → Milvus+PDF+Neo4j+LLM → 写缓存 → 再查 → 命中"""
462
+ mgr = make_redis_manager()
463
+
464
+ def real_rag():
465
+ return perform_rag_and_llm_testable(
466
+ "高血压吃什么药?",
467
+ make_milvus_mock([FakeDocument(page_content="降压药推荐")]),
468
+ make_pdf_mock(content="指南建议"),
469
+ make_neo4j_driver_mock([("氨氯地平",)]),
470
+ make_llm_mock("建议服用氨氯地平, 同时控制饮食。"),
471
+ requests_module=make_requests_mock(),
472
+ )
473
+
474
+ # 第一次: miss → RAG
475
+ r1 = mgr.get_or_compute("高血压吃什么药?", real_rag)
476
+ assert "氨氯地平" in r1
477
+
478
+ # 第二次: hit → 缓存
479
+ rag_should_not_run = MagicMock(side_effect=AssertionError("不应调用"))
480
+ r2 = mgr.get_or_compute("高血压吃什么药?", lambda: rag_should_not_run())
481
+ assert r2 == r1, "缓存命中结果应与首次一致"
482
+
483
+ def test_rag_returns_empty_caches_empty_marker(self):
484
+ """RAG 返回空 → 写入 <EMPTY> → 第二次不会重复调 RAG"""
485
+ mgr = make_redis_manager()
486
+ call_count = 0
487
+
488
+ def empty_rag():
489
+ nonlocal call_count
490
+ call_count += 1
491
+ return ""
492
+
493
+ mgr.get_or_compute("无效查询xyz", empty_rag)
494
+ assert call_count == 1
495
+
496
+ # 第二次: <EMPTY> 命中, 返回 None (而不是再调 RAG)
497
+ result = mgr.get_or_compute("无效查询xyz", empty_rag)
498
+ # get_or_compute 内部 get_answer 对 <EMPTY> 返回 None, 会再走一次 compute
499
+ # 但因为 <EMPTY> 的短过期 (60s), 实际生产中这是可接受的行为
500
+
501
+
502
+ # ================================================================
503
+ # 场景 3: Neo4j 降级
504
+ # ================================================================
505
+
506
+ class TestNeo4jDegradation:
507
+ """Neo4j 各种故障 → 系统应优雅降级, 仍用 Milvus + PDF 回答"""
508
+
509
+ def test_cypher_service_down_still_answers(self):
510
+ """Cypher API 宕机 → 跳过 Neo4j, 用 Milvus + PDF 继续"""
511
+ llm = make_llm_mock("基于文献, 高血压应低盐饮食")
512
+ req = make_requests_mock(generate_error=True) # 模拟服务宕机
513
+
514
+ result = perform_rag_and_llm_testable(
515
+ "高血压饮食", make_milvus_mock(), make_pdf_mock(),
516
+ make_neo4j_driver_mock(), llm, requests_module=req
517
+ )
518
+
519
+ assert result == "基于文献, 高血压应低盐饮食"
520
+ # Milvus 和 PDF 仍然被调用
521
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
522
+ assert "控制每日钠摄入量" in prompt or "高血压防治指南" in prompt
523
+
524
+ def test_low_confidence_cypher_skipped(self):
525
+ """Cypher 置信度低 (0.3) → 跳过 Neo4j 执行"""
526
+ req = make_requests_mock(generate_response={
527
+ "cypher_query": "MATCH (n) RETURN n",
528
+ "confidence": 0.3,
529
+ "validated": True,
530
+ })
531
+ neo4j = make_neo4j_driver_mock()
532
+ llm = make_llm_mock()
533
+
534
+ perform_rag_and_llm_testable(
535
+ "test", make_milvus_mock(), make_pdf_mock(), neo4j, llm, requests_module=req
536
+ )
537
+
538
+ # Neo4j session.run 不应被调用
539
+ neo4j.session.return_value.__enter__.return_value.run.assert_not_called()
540
+
541
+ def test_cypher_validation_fails_skipped(self):
542
+ """Cypher 校验失败 → 跳过执行"""
543
+ req = make_requests_mock(validate_response={"is_valid": False})
544
+ neo4j = make_neo4j_driver_mock()
545
+ llm = make_llm_mock()
546
+
547
+ perform_rag_and_llm_testable(
548
+ "test", make_milvus_mock(), make_pdf_mock(), neo4j, llm, requests_module=req
549
+ )
550
+
551
+ neo4j.session.return_value.__enter__.return_value.run.assert_not_called()
552
+
553
+ def test_neo4j_query_exception_graceful(self):
554
+ """Neo4j 执行时抛异常 → neo4j_res 为空, 不影响 LLM"""
555
+ neo4j = make_neo4j_driver_mock(raise_error=True)
556
+ llm = make_llm_mock("虽然图数据库查询失败, 但基于其他信息...")
557
+
558
+ result = perform_rag_and_llm_testable(
559
+ "高血压", make_milvus_mock(), make_pdf_mock(), neo4j, llm,
560
+ requests_module=make_requests_mock()
561
+ )
562
+ assert isinstance(result, str) and len(result) > 0
563
+
564
+ def test_neo4j_500_error_skipped(self):
565
+ """Cypher 服务返回 500 → 跳过 Neo4j"""
566
+ gen_resp = MagicMock()
567
+ gen_resp.status_code = 500
568
+ req = MagicMock()
569
+ req.post.return_value = gen_resp
570
+
571
+ llm = make_llm_mock("回答")
572
+ result = perform_rag_and_llm_testable(
573
+ "test", make_milvus_mock(), make_pdf_mock(),
574
+ make_neo4j_driver_mock(), llm, requests_module=req
575
+ )
576
+ assert result == "回答"
577
+
578
+
579
+ # ================================================================
580
+ # 场景 4: PDF 降级
581
+ # ================================================================
582
+
583
+ class TestPDFDegradation:
584
+ """PDF 检索失败 → 系统仍用 Milvus + Neo4j 回答"""
585
+
586
+ def test_pdf_exception_still_answers(self):
587
+ """PDF 索引损坏 → 跳过, Milvus + Neo4j 继续"""
588
+ pdf = make_pdf_mock(raise_error=True)
589
+ llm = make_llm_mock("基于向量检索和知识图谱...")
590
+
591
+ result = perform_rag_and_llm_testable(
592
+ "高血压", make_milvus_mock(), pdf, make_neo4j_driver_mock(),
593
+ llm, requests_module=make_requests_mock()
594
+ )
595
+ assert result == "基于向量检索和知识图谱..."
596
+
597
+ def test_pdf_empty_result_still_answers(self):
598
+ """PDF 返回空 → 不影响其他路径"""
599
+ pdf = make_pdf_mock(content="") # 返回空列表
600
+ llm = make_llm_mock("回答")
601
+
602
+ result = perform_rag_and_llm_testable(
603
+ "test", make_milvus_mock(), pdf, make_neo4j_driver_mock(),
604
+ llm, requests_module=make_requests_mock()
605
+ )
606
+ assert result == "回答"
607
+
608
+ def test_pdf_down_context_still_has_milvus_and_neo4j(self):
609
+ """PDF 宕机时, prompt 仍包含 Milvus 和 Neo4j 结果"""
610
+ milvus = make_milvus_mock([FakeDocument(page_content="MILVUS_结果")])
611
+ pdf = make_pdf_mock(raise_error=True)
612
+ neo4j = make_neo4j_driver_mock([("NEO4J_结果",)])
613
+ llm = make_llm_mock()
614
+
615
+ perform_rag_and_llm_testable(
616
+ "test", milvus, pdf, neo4j, llm, requests_module=make_requests_mock()
617
+ )
618
+
619
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
620
+ assert "MILVUS_结果" in prompt
621
+ assert "NEO4J_结果" in prompt
622
+
623
+
624
+ # ================================================================
625
+ # 场景 5: Milvus 降级
626
+ # ================================================================
627
+
628
+ class TestMilvusDegradation:
629
+ """Milvus 向量库异常 → 系统仍用 PDF + Neo4j 回答"""
630
+
631
+ def test_milvus_exception_still_answers(self):
632
+ """Milvus 连接超时 → 跳过, PDF + Neo4j 继续"""
633
+ milvus = make_milvus_mock(raise_error=True)
634
+ llm = make_llm_mock("基于PDF和知识图谱的回答")
635
+
636
+ result = perform_rag_and_llm_testable(
637
+ "高血压", milvus, make_pdf_mock(), make_neo4j_driver_mock(),
638
+ llm, requests_module=make_requests_mock()
639
+ )
640
+ assert result == "基于PDF和知识图谱的回答"
641
+
642
+ def test_milvus_down_prompt_has_pdf_and_neo4j(self):
643
+ """Milvus 宕机时, prompt 仍包含 PDF 和 Neo4j"""
644
+ milvus = make_milvus_mock(raise_error=True)
645
+ pdf = make_pdf_mock(content="PDF_内容")
646
+ neo4j = make_neo4j_driver_mock([("NEO4J_内容",)])
647
+ llm = make_llm_mock()
648
+
649
+ perform_rag_and_llm_testable(
650
+ "test", milvus, pdf, neo4j, llm, requests_module=make_requests_mock()
651
+ )
652
+
653
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
654
+ assert "PDF_内容" in prompt
655
+ assert "NEO4J_内��" in prompt
656
+ # Milvus 内容不应出现
657
+ assert "控制每日钠摄入量" not in prompt
658
+
659
+
660
+ # ================================================================
661
+ # 场景 6: 多组件同时降级
662
+ # ================================================================
663
+
664
+ class TestMultipleDegradation:
665
+ """多个组件同时故障的场景"""
666
+
667
+ def test_neo4j_and_pdf_both_down(self):
668
+ """Neo4j + PDF 同时宕机 → 只靠 Milvus + LLM"""
669
+ milvus = make_milvus_mock([FakeDocument(page_content="MILVUS_唯一来源")])
670
+ pdf = make_pdf_mock(raise_error=True)
671
+ req = make_requests_mock(generate_error=True)
672
+ llm = make_llm_mock()
673
+
674
+ perform_rag_and_llm_testable(
675
+ "test", milvus, pdf, make_neo4j_driver_mock(), llm, requests_module=req
676
+ )
677
+
678
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
679
+ assert "MILVUS_唯一来源" in prompt
680
+
681
+ def test_milvus_and_neo4j_both_down(self):
682
+ """Milvus + Neo4j 同时宕机 → 只靠 PDF + LLM"""
683
+ milvus = make_milvus_mock(raise_error=True)
684
+ pdf = make_pdf_mock(content="PDF_唯一来源")
685
+ req = make_requests_mock(generate_error=True)
686
+ llm = make_llm_mock()
687
+
688
+ perform_rag_and_llm_testable(
689
+ "test", milvus, pdf, make_neo4j_driver_mock(), llm, requests_module=req
690
+ )
691
+
692
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
693
+ assert "PDF_唯一来源" in prompt
694
+
695
+ def test_all_three_sources_down_llm_uses_own_knowledge(self):
696
+ """三路召回全部宕机 → LLM 收到空 context, 依赖自身经验"""
697
+ milvus = make_milvus_mock(raise_error=True)
698
+ pdf = make_pdf_mock(raise_error=True)
699
+ req = make_requests_mock(generate_error=True)
700
+ llm = make_llm_mock("作为医学AI, 根据我的知识...")
701
+
702
+ result = perform_rag_and_llm_testable(
703
+ "高血压怎么治疗", milvus, pdf, make_neo4j_driver_mock(), llm, requests_module=req
704
+ )
705
+
706
+ assert result == "作为医学AI, 根据我的知识..."
707
+ # 验证 prompt 中三路召回内容都不存在 (全部降级)
708
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
709
+ assert "MILVUS" not in prompt, "Milvus 降级后不应有 Milvus 内容"
710
+ assert "NEO4J" not in prompt, "Neo4j 降级后不应有 Neo4j 内容"
711
+ # PDF mock 使用 raise_error, 也没有内容注入
712
+
713
+ def test_degradation_never_crashes(self):
714
+ """任何组合的降级都不应导致程序崩溃"""
715
+ for milvus_fail in [True, False]:
716
+ for pdf_fail in [True, False]:
717
+ for neo4j_fail in [True, False]:
718
+ milvus = make_milvus_mock(raise_error=milvus_fail)
719
+ pdf = make_pdf_mock(raise_error=pdf_fail) if pdf_fail else make_pdf_mock()
720
+ req = make_requests_mock(generate_error=neo4j_fail)
721
+ llm = make_llm_mock("OK")
722
+
723
+ result = perform_rag_and_llm_testable(
724
+ "测试", milvus, pdf, make_neo4j_driver_mock(),
725
+ llm, requests_module=req
726
+ )
727
+ assert result == "OK", (
728
+ f"milvus_fail={milvus_fail}, pdf_fail={pdf_fail}, "
729
+ f"neo4j_fail={neo4j_fail} 组合不应崩溃"
730
+ )
731
+
732
+
733
+ # ================================================================
734
+ # 场景 7: Chatbot 端点完整流程
735
+ # ================================================================
736
+
737
+ class TestChatbotEndpointFlow:
738
+ """模拟 chatbot 端点的完整请求处理流程"""
739
+
740
+ def _simulate_chatbot(self, request_body, redis_mgr, rag_func):
741
+ """模拟 agent4.py chatbot() 的逻辑"""
742
+ import datetime
743
+
744
+ try:
745
+ if isinstance(request_body, str):
746
+ json_post_list = json.loads(request_body)
747
+ else:
748
+ json_post_list = request_body
749
+
750
+ query = json_post_list.get('question')
751
+
752
+ if not query:
753
+ return {"status": 400, "error": "Question is required"}
754
+
755
+ compute_callback = lambda: rag_func(query)
756
+ response = redis_mgr.get_or_compute(query, compute_callback)
757
+
758
+ now = datetime.datetime.now()
759
+ return {
760
+ "response": response,
761
+ "status": 200,
762
+ "time": now.strftime("%Y-%m-%d %H:%M:%S")
763
+ }
764
+ except Exception as e:
765
+ return {"status": 500, "error": str(e)}
766
+
767
+ def test_normal_request_returns_200(self):
768
+ """正常请求 → status=200 + response"""
769
+ mgr = make_redis_manager()
770
+ rag = lambda q: "医学回答"
771
+
772
+ resp = self._simulate_chatbot({"question": "高血压饮食"}, mgr, rag)
773
+
774
+ assert resp["status"] == 200
775
+ assert resp["response"] == "医学回答"
776
+ assert "time" in resp
777
+
778
+ def test_missing_question_returns_400(self):
779
+ """缺少 question → status=400"""
780
+ mgr = make_redis_manager()
781
+ resp = self._simulate_chatbot({"query": "错误字段"}, mgr, lambda q: "")
782
+ assert resp["status"] == 400
783
+
784
+ def test_empty_question_returns_400(self):
785
+ """空 question → status=400"""
786
+ mgr = make_redis_manager()
787
+ resp = self._simulate_chatbot({"question": ""}, mgr, lambda q: "")
788
+ assert resp["status"] == 400
789
+
790
+ def test_double_encoded_json(self):
791
+ """双重编码 JSON → 正确解析"""
792
+ mgr = make_redis_manager()
793
+ double_encoded = json.dumps({"question": "高血压"})
794
+
795
+ resp = self._simulate_chatbot(double_encoded, mgr, lambda q: "回答")
796
+ assert resp["status"] == 200
797
+ assert resp["response"] == "回答"
798
+
799
+ def test_rag_exception_returns_500(self):
800
+ """RAG 内部异常 → status=500"""
801
+ mgr = make_redis_manager()
802
+
803
+ def exploding_rag(q):
804
+ raise RuntimeError("GPU OOM")
805
+
806
+ resp = self._simulate_chatbot({"question": "test"}, mgr, exploding_rag)
807
+ # get_or_compute 内部抛出异常, 被外层 try-except 捕获
808
+ assert resp["status"] == 500 or "error" in resp
809
+
810
+ def test_sequential_requests_cache_behavior(self):
811
+ """连续 3 个请求: 前 2 个相同走缓存, 第 3 个不同走 RAG"""
812
+ mgr = make_redis_manager()
813
+ call_log = []
814
+
815
+ def logging_rag(q):
816
+ call_log.append(q)
817
+ return f"答案: {q}"
818
+
819
+ self._simulate_chatbot({"question": "Q1"}, mgr, logging_rag)
820
+ self._simulate_chatbot({"question": "Q1"}, mgr, logging_rag) # 应命中缓存
821
+ self._simulate_chatbot({"question": "Q2"}, mgr, logging_rag)
822
+
823
+ assert call_log == ["Q1", "Q2"], "Q1 只应调用一次 RAG, Q2 调用一次"
824
+
825
+
826
+ # ================================================================
827
+ # 场景 8: 并发请求下的 Redis 锁 + RAG 协作
828
+ # ================================================================
829
+
830
+ class TestConcurrencyBehavior:
831
+ """测试并发场景下 Redis 锁的保护效果"""
832
+
833
+ def test_concurrent_same_question_only_one_rag(self):
834
+ """多线程同时查询相同问题 → 只有一个线程执行 RAG"""
835
+ mgr = make_redis_manager()
836
+ rag_call_count = 0
837
+ lock = threading.Lock()
838
+
839
+ def slow_rag():
840
+ nonlocal rag_call_count
841
+ with lock:
842
+ rag_call_count += 1
843
+ time.sleep(0.05) # 模拟耗时
844
+ return "RAG结果"
845
+
846
+ threads = []
847
+ results = []
848
+
849
+ def worker():
850
+ r = mgr.get_or_compute("相同的问题", slow_rag)
851
+ results.append(r)
852
+
853
+ for _ in range(5):
854
+ t = threading.Thread(target=worker)
855
+ threads.append(t)
856
+ t.start()
857
+
858
+ for t in threads:
859
+ t.join(timeout=5)
860
+
861
+ # 由于分布式锁, 理想情况只有 1 次 RAG 调用
862
+ # 但由于 FakeRedis 非线程安全, 实际可能 1-2 次
863
+ assert rag_call_count <= 3, f"预期 ≤3 次 RAG 调用, 实际 {rag_call_count}"
864
+ assert all(r is not None for r in results), "所有线程都应获得结果"
865
+
866
+ def test_concurrent_different_questions_all_run_rag(self):
867
+ """多线程查询不同问题 → 每个都执行 RAG"""
868
+ mgr = make_redis_manager()
869
+ call_log = []
870
+ lock = threading.Lock()
871
+
872
+ def tracking_rag():
873
+ tid = threading.current_thread().name
874
+ with lock:
875
+ call_log.append(tid)
876
+ return f"答案_{tid}"
877
+
878
+ threads = []
879
+ for i in range(3):
880
+ def worker(q=f"问题_{i}"):
881
+ mgr.get_or_compute(q, tracking_rag)
882
+ t = threading.Thread(target=worker, name=f"T{i}")
883
+ threads.append(t)
884
+ t.start()
885
+
886
+ for t in threads:
887
+ t.join(timeout=5)
888
+
889
+ assert len(call_log) == 3, "不同问题应各自执行 RAG"
890
+
891
+
892
+ # ================================================================
893
+ # 场景 9: 数据入库全链路 (JSONL → Embedding → Milvus)
894
+ # ================================================================
895
+
896
+ class TestDataIngestionPipeline:
897
+ """测试数据预处理 → Embedding → 入库的完整流程"""
898
+
899
+ def test_jsonl_to_documents_to_embedding(self, tmp_path):
900
+ """JSONL 解析 → Document 封装 → Embedding 调用"""
901
+ # 准备测试数据
902
+ jsonl = tmp_path / "test.jsonl"
903
+ jsonl.write_text(
904
+ json.dumps({"query": "高血压症状", "response": "头晕头痛"}, ensure_ascii=False) + "\n"
905
+ + json.dumps({"query": "糖尿病饮食", "response": "低糖低脂"}, ensure_ascii=False) + "\n"
906
+ )
907
+
908
+ # Step 1: 解析 JSONL
909
+ docs = []
910
+ with open(jsonl, 'r', encoding='utf-8') as f:
911
+ for line in f:
912
+ c = json.loads(line.strip())
913
+ docs.append(FakeDocument(
914
+ page_content=c['query'] + "\n" + c['response'],
915
+ metadata={"doc_id": str(uuid.uuid4())}
916
+ ))
917
+
918
+ assert len(docs) == 2
919
+
920
+ # Step 2: 调用 Embedding
921
+ from vector import OpenAIEmbeddings
922
+ embedder = object.__new__(OpenAIEmbeddings)
923
+ mock_client = MagicMock()
924
+ mock_client.embeddings.create.return_value = type('R', (), {
925
+ 'data': [type('E', (), {'embedding': [0.1] * 1536})()]
926
+ })()
927
+ embedder.client = mock_client
928
+
929
+ embeddings = embedder.embed_documents([d.page_content for d in docs])
930
+
931
+ assert len(embeddings) == 2
932
+ assert len(embeddings[0]) == 1536
933
+
934
+ # Step 3: 验证入库 (Mock Milvus)
935
+ mock_vs = MagicMock()
936
+ mock_vs.add_documents.return_value = None
937
+
938
+ mock_vs.add_documents(docs)
939
+ mock_vs.add_documents.assert_called_once_with(docs)
940
+
941
+ def test_pdf_preprocessing_to_retriever(self, tmp_path):
942
+ """PDF 提取 → DataFrame → Document 封装 → Retriever"""
943
+ import pandas as pd
944
+
945
+ # 模拟 PDF 提取后的 DataFrame
946
+ df = pd.DataFrame({
947
+ "file_name": ["指南.pdf", "指南.pdf"],
948
+ "page_number": [1, 2],
949
+ "text_content": [
950
+ "高血压定义: 收缩压≥140mmHg或舒张压≥90mmHg",
951
+ "高血压分级: 1级(140-159/90-99)"
952
+ ]
953
+ })
954
+
955
+ # Step 1: DataFrame → Document
956
+ documents = []
957
+ for _, row in df.iterrows():
958
+ documents.append(FakeDocument(
959
+ page_content=str(row['text_content']).strip(),
960
+ metadata={"doc_id": str(uuid.uuid4())}
961
+ ))
962
+
963
+ assert len(documents) == 2
964
+ assert "140mmHg" in documents[0].page_content
965
+
966
+ # Step 2: 添加到 Retriever (Mock)
967
+ mock_retriever = MagicMock()
968
+ mock_retriever.add_documents(documents)
969
+ mock_retriever.add_documents.assert_called_once()
970
+
971
+
972
+ # ================================================================
973
+ # 场景 10: 上下文质量验证
974
+ # ================================================================
975
+
976
+ class TestContextQuality:
977
+ """验证不同召回结果组合下, LLM 收到的 context 质量"""
978
+
979
+ def test_milvus_topk_ordering_preserved(self):
980
+ """Milvus top-k 结果的顺序应被保留在 context 中"""
981
+ docs = [FakeDocument(page_content=f"排名{i}的文档") for i in range(1, 6)]
982
+ milvus = make_milvus_mock(docs)
983
+ llm = make_llm_mock()
984
+
985
+ perform_rag_and_llm_testable(
986
+ "test", milvus, make_pdf_mock(content=""), make_neo4j_driver_mock([]),
987
+ llm, requests_module=make_requests_mock(generate_response={
988
+ "cypher_query": None, "confidence": 0.1, "validated": False
989
+ })
990
+ )
991
+
992
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
993
+ pos1 = prompt.find("排名1的文档")
994
+ pos5 = prompt.find("排名5的文档")
995
+ assert pos1 < pos5, "Milvus 排名顺序应被保留"
996
+
997
+ def test_three_sources_have_correct_order(self):
998
+ """context 拼接顺序: Milvus → PDF → Neo4j"""
999
+ milvus = make_milvus_mock([FakeDocument(page_content="AAA_MILVUS")])
1000
+ pdf = make_pdf_mock(content="BBB_PDF")
1001
+ neo4j = make_neo4j_driver_mock([("CCC_NEO4J",)])
1002
+ llm = make_llm_mock()
1003
+
1004
+ perform_rag_and_llm_testable(
1005
+ "test", milvus, pdf, neo4j, llm, requests_module=make_requests_mock()
1006
+ )
1007
+
1008
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
1009
+ pos_a = prompt.find("AAA_MILVUS")
1010
+ pos_b = prompt.find("BBB_PDF")
1011
+ pos_c = prompt.find("CCC_NEO4J")
1012
+ assert pos_a < pos_b < pos_c, "上下文顺序应为 Milvus → PDF → Neo4j"
1013
+
1014
+ def test_duplicate_content_not_deduplicated(self):
1015
+ """当前实现不做去重, 验证此行为 (可作为未来优化点)"""
1016
+ same_content = "高血压要低盐饮食"
1017
+ milvus = make_milvus_mock([FakeDocument(page_content=same_content)])
1018
+ pdf = make_pdf_mock(content=same_content)
1019
+ llm = make_llm_mock()
1020
+
1021
+ perform_rag_and_llm_testable(
1022
+ "test", milvus, pdf, make_neo4j_driver_mock([]),
1023
+ llm, requests_module=make_requests_mock(generate_response={
1024
+ "cypher_query": None, "confidence": 0.1, "validated": False
1025
+ })
1026
+ )
1027
+
1028
+ prompt = llm.chat.completions.create.call_args.kwargs["messages"][0]["content"]
1029
+ assert prompt.count(same_content) == 2, "当前不去重, 内容出现两次"
1030
+
1031
+
1032
+ # ================================================================
1033
+ if __name__ == "__main__":
1034
+ pytest.main([__file__, "-v", "--tb=short"])