customeragent-api / server /tests /verify_iqra_rag.py
anasraza526's picture
Clean deploy to Hugging Face
ac90985
import asyncio
import sys
import os
import json
# Setup paths
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from app.services.unified_data_manager import UnifiedDataSourceManager
from app.services.language_detector import Language
async def run_verification():
print("πŸš€ STARTING RAG FIX VERIFICATION (IQRA UNIVERSITY)")
print("=" * 70)
# Initialize manager with a mock DB to bypass early exits in retrieval
from unittest.mock import MagicMock
from app.services.unified_data_manager import DataSource
mock_db = MagicMock()
# Mock the website query to return something so it doesn't fail
mock_website = MagicMock()
mock_website.scraped_content = True # Needed for the logic check
mock_db.query.return_value.filter_by.return_value.first.return_value = mock_website
manager = UnifiedDataSourceManager(db_session=mock_db)
# FORCE enable scraped data source for testing
manager.available_sources[DataSource.WEBSITE_SCRAPED] = True
# Test cases that failed in the audit
test_cases = [
{
"query": "what is admission criteria of MBA Master's Program",
"expected_contains": ["2.5 CGPA", "2nd Division"],
"not_expected": ["GMAT", "GRE", "3-5 years"]
},
{
"query": "how many campuses does iqra University have ?",
"expected_contains": ["Main", "North", "Islamabad"],
"not_expected": ["Lahore", "Multan", "around 10"]
},
{
"query": "iqra university Main Campus (Karachi) exact location",
"expected_contains": ["Defence View", "Shaheed-e-Millat"],
"not_expected": ["Gulistan-e-Johar", "University Road"]
}
]
website_id = 22
passed_count = 0
for test in test_cases:
print(f"\nQUERY: {test['query']}")
# DEBUG: Direct Vector Search
from app.services.vector_db import VectorDB
from app.services.vector_operations import VectorOperations
import numpy as np
vdb = VectorDB()
vdb.load(website_id)
query_emb = await VectorOperations.get_embedding(test['query'])
raw_results = vdb.search(
np.array(query_emb, dtype=np.float32),
website_id,
k=5,
min_score=0.0,
min_truth_level=None
)
print(f"TRANSFORMER SEARCH RESULTS:")
for i, (meta, score) in enumerate(raw_results[:3]):
print(f" [{i}] Score: {score:.4f} | Text: {meta.get('text', '')[:60]}...")
# TEST: Simple Hash Embedding
import re
def simple_hash(text):
words = re.findall(r'\w+', text.lower())
embedding = np.zeros(384, dtype=np.float32)
if not words: return embedding
for word in words:
idx = sum(ord(c) for c in word) % 384
embedding[idx] += 1.0 / len(words)
return embedding
hash_query_emb = simple_hash("query: " + test['query'])
hash_results = vdb.search(
hash_query_emb,
website_id,
k=5,
min_score=0.0,
min_truth_level=None
)
print(f"HASH SEARCH RESULTS:")
for i, (meta, score) in enumerate(hash_results[:3]):
print(f" [{i}] Score: {score:.4f} | Text: {meta.get('text', '')[:60]}...")
result = await manager.query(
user_query=test['query'],
website_id=website_id,
industry="education",
session_id="verification_session"
)
answer = result['answer']
source = result['source']
context = result.get('aggregated_context', '')
print(f"SOURCE: {source}")
print("-" * 50)
print(f"FULL AGGREGATED CONTEXT:\n{context}")
print("-" * 50)
print(f"ANSWER: {answer}")
# Check accuracy
passed = True
for expected in test['expected_contains']:
if expected.lower() not in answer.lower():
print(f"❌ FAILED: Expected '{expected}' not found in answer.")
passed = False
for unexpected in test['not_expected']:
if unexpected.lower() in answer.lower():
print(f"❌ FAILED: Hallucinated '{unexpected}' found in answer.")
passed = False
if passed:
print("βœ… TEST PASSED")
passed_count += 1
else:
print("❌ TEST FAILED")
print("\n" + "=" * 70)
print(f"VERIFICATION COMPLETE: {passed_count}/{len(test_cases)} Passed")
print("=" * 70)
if __name__ == "__main__":
asyncio.run(run_verification())