Spaces:
Runtime error
Runtime error
| import asyncio | |
| import sys | |
| import os | |
| import json | |
| # Setup paths | |
| sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) | |
| from app.services.unified_data_manager import UnifiedDataSourceManager | |
| from app.services.language_detector import Language | |
| async def run_verification(): | |
| print("π STARTING RAG FIX VERIFICATION (IQRA UNIVERSITY)") | |
| print("=" * 70) | |
| # Initialize manager with a mock DB to bypass early exits in retrieval | |
| from unittest.mock import MagicMock | |
| from app.services.unified_data_manager import DataSource | |
| mock_db = MagicMock() | |
| # Mock the website query to return something so it doesn't fail | |
| mock_website = MagicMock() | |
| mock_website.scraped_content = True # Needed for the logic check | |
| mock_db.query.return_value.filter_by.return_value.first.return_value = mock_website | |
| manager = UnifiedDataSourceManager(db_session=mock_db) | |
| # FORCE enable scraped data source for testing | |
| manager.available_sources[DataSource.WEBSITE_SCRAPED] = True | |
| # Test cases that failed in the audit | |
| test_cases = [ | |
| { | |
| "query": "what is admission criteria of MBA Master's Program", | |
| "expected_contains": ["2.5 CGPA", "2nd Division"], | |
| "not_expected": ["GMAT", "GRE", "3-5 years"] | |
| }, | |
| { | |
| "query": "how many campuses does iqra University have ?", | |
| "expected_contains": ["Main", "North", "Islamabad"], | |
| "not_expected": ["Lahore", "Multan", "around 10"] | |
| }, | |
| { | |
| "query": "iqra university Main Campus (Karachi) exact location", | |
| "expected_contains": ["Defence View", "Shaheed-e-Millat"], | |
| "not_expected": ["Gulistan-e-Johar", "University Road"] | |
| } | |
| ] | |
| website_id = 22 | |
| passed_count = 0 | |
| for test in test_cases: | |
| print(f"\nQUERY: {test['query']}") | |
| # DEBUG: Direct Vector Search | |
| from app.services.vector_db import VectorDB | |
| from app.services.vector_operations import VectorOperations | |
| import numpy as np | |
| vdb = VectorDB() | |
| vdb.load(website_id) | |
| query_emb = await VectorOperations.get_embedding(test['query']) | |
| raw_results = vdb.search( | |
| np.array(query_emb, dtype=np.float32), | |
| website_id, | |
| k=5, | |
| min_score=0.0, | |
| min_truth_level=None | |
| ) | |
| print(f"TRANSFORMER SEARCH RESULTS:") | |
| for i, (meta, score) in enumerate(raw_results[:3]): | |
| print(f" [{i}] Score: {score:.4f} | Text: {meta.get('text', '')[:60]}...") | |
| # TEST: Simple Hash Embedding | |
| import re | |
| def simple_hash(text): | |
| words = re.findall(r'\w+', text.lower()) | |
| embedding = np.zeros(384, dtype=np.float32) | |
| if not words: return embedding | |
| for word in words: | |
| idx = sum(ord(c) for c in word) % 384 | |
| embedding[idx] += 1.0 / len(words) | |
| return embedding | |
| hash_query_emb = simple_hash("query: " + test['query']) | |
| hash_results = vdb.search( | |
| hash_query_emb, | |
| website_id, | |
| k=5, | |
| min_score=0.0, | |
| min_truth_level=None | |
| ) | |
| print(f"HASH SEARCH RESULTS:") | |
| for i, (meta, score) in enumerate(hash_results[:3]): | |
| print(f" [{i}] Score: {score:.4f} | Text: {meta.get('text', '')[:60]}...") | |
| result = await manager.query( | |
| user_query=test['query'], | |
| website_id=website_id, | |
| industry="education", | |
| session_id="verification_session" | |
| ) | |
| answer = result['answer'] | |
| source = result['source'] | |
| context = result.get('aggregated_context', '') | |
| print(f"SOURCE: {source}") | |
| print("-" * 50) | |
| print(f"FULL AGGREGATED CONTEXT:\n{context}") | |
| print("-" * 50) | |
| print(f"ANSWER: {answer}") | |
| # Check accuracy | |
| passed = True | |
| for expected in test['expected_contains']: | |
| if expected.lower() not in answer.lower(): | |
| print(f"β FAILED: Expected '{expected}' not found in answer.") | |
| passed = False | |
| for unexpected in test['not_expected']: | |
| if unexpected.lower() in answer.lower(): | |
| print(f"β FAILED: Hallucinated '{unexpected}' found in answer.") | |
| passed = False | |
| if passed: | |
| print("β TEST PASSED") | |
| passed_count += 1 | |
| else: | |
| print("β TEST FAILED") | |
| print("\n" + "=" * 70) | |
| print(f"VERIFICATION COMPLETE: {passed_count}/{len(test_cases)} Passed") | |
| print("=" * 70) | |
| if __name__ == "__main__": | |
| asyncio.run(run_verification()) | |