scrapeRL / backend /test_full_system.py
NeerajCodz's picture
docs: init proto
24f0bf0
#!/usr/bin/env python3
"""
Comprehensive ScrapeRL System Test Suite
Tests all components at LOW, MID, and HIGH complexity levels:
- Scraper environment and actions
- Reward function calculations
- Plugin system
- Embeddings with Gemini
- Vector search (memory)
- AI providers (NVIDIA, Groq)
- API endpoints
Author: ScrapeRL Test Suite
"""
import asyncio
import json
import sys
import os
import time
from datetime import datetime
from typing import Any
from dataclasses import dataclass, field
from enum import Enum
from pathlib import Path
# Add backend to path
sys.path.insert(0, str(Path(__file__).parent))
# Load environment variables
from dotenv import load_dotenv
load_dotenv()
class TestComplexity(str, Enum):
LOW = "low"
MID = "mid"
HIGH = "high"
@dataclass
class TestResult:
"""Individual test result."""
name: str
complexity: TestComplexity
component: str
passed: bool
duration: float
details: dict[str, Any] = field(default_factory=dict)
error: str | None = None
class TestReporter:
"""Generates comprehensive test reports."""
def __init__(self):
self.results: list[TestResult] = []
self.start_time: datetime = datetime.now()
def add_result(self, result: TestResult):
self.results.append(result)
status = "✅ PASS" if result.passed else "❌ FAIL"
print(f" [{result.complexity.value.upper()}] {result.name}: {status} ({result.duration:.2f}s)")
if result.error:
print(f" Error: {result.error[:100]}")
def generate_report(self) -> str:
"""Generate markdown test report."""
end_time = datetime.now()
duration = (end_time - self.start_time).total_seconds()
passed = sum(1 for r in self.results if r.passed)
failed = sum(1 for r in self.results if not r.passed)
success_rate = (passed / len(self.results) * 100) if self.results else 0
report = f"""# ScrapeRL Comprehensive Test Report
**Generated:** {end_time.strftime('%Y-%m-%d %H:%M:%S')}
**Test Duration:** {duration:.2f}s
## Summary
- **Total Tests:** {len(self.results)}
- **Passed:** ✅ {passed}
- **Failed:** ❌ {failed}
- **Success Rate:** {success_rate:.1f}%
## Tests by Complexity
"""
# Group by complexity
for complexity in TestComplexity:
comp_results = [r for r in self.results if r.complexity == complexity]
if comp_results:
comp_passed = sum(1 for r in comp_results if r.passed)
report += f"### {complexity.value.upper()} Complexity ({comp_passed}/{len(comp_results)} passed)\n\n"
for result in comp_results:
status = "✅ PASS" if result.passed else "❌ FAIL"
report += f"#### {result.name} {status}\n\n"
report += f"**Component:** {result.component} \n"
report += f"**Duration:** {result.duration:.2f}s \n\n"
if result.details:
report += "**Details:**\n```json\n"
report += json.dumps(result.details, indent=2, default=str)[:1000]
report += "\n```\n\n"
if result.error:
report += f"**Error:**\n```\n{result.error[:500]}\n```\n\n"
report += "---\n\n"
# Component summary
report += "## Component Summary\n\n"
report += "| Component | Tests | Passed | Failed | Success Rate |\n"
report += "|-----------|-------|--------|--------|-------------|\n"
components = set(r.component for r in self.results)
for comp in sorted(components):
comp_results = [r for r in self.results if r.component == comp]
comp_passed = sum(1 for r in comp_results if r.passed)
comp_failed = len(comp_results) - comp_passed
comp_rate = (comp_passed / len(comp_results) * 100) if comp_results else 0
report += f"| {comp} | {len(comp_results)} | {comp_passed} | {comp_failed} | {comp_rate:.1f}% |\n"
return report
class ScrapeRLTestSuite:
"""Comprehensive test suite for ScrapeRL."""
def __init__(self):
self.reporter = TestReporter()
async def run_all_tests(self):
"""Run all tests."""
print("\n" + "="*60)
print("🧪 ScrapeRL Comprehensive Test Suite")
print("="*60 + "\n")
# Test categories
test_categories = [
("Scraper Environment", self.test_scraper_environment),
("Reward Function", self.test_reward_function),
("Plugins System", self.test_plugins),
("Embeddings (Gemini)", self.test_embeddings),
("Vector Search / Memory", self.test_vector_search),
("AI Providers", self.test_ai_providers),
("API Endpoints", self.test_api_endpoints),
]
for category_name, test_func in test_categories:
print(f"\n📋 Testing: {category_name}")
print("-" * 40)
try:
await test_func()
except Exception as e:
print(f" ❌ Category failed: {e}")
# Generate report
report = self.reporter.generate_report()
# Save report
report_path = Path(__file__).parent.parent / "docs" / "test" / "comprehensive-test-report.md"
report_path.parent.mkdir(parents=True, exist_ok=True)
report_path.write_text(report, encoding='utf-8')
print("\n" + "="*60)
print(f"📊 Test Report saved to: {report_path}")
passed = sum(1 for r in self.reporter.results if r.passed)
total = len(self.reporter.results)
print(f"✅ Final Results: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
print("="*60 + "\n")
return self.reporter.results
# =========================================================================
# SCRAPER ENVIRONMENT TESTS
# =========================================================================
async def test_scraper_environment(self):
"""Test the scraper environment at different complexity levels."""
# LOW: Basic environment creation and reset
start = time.time()
try:
from app.core.env import WebScraperEnv
from app.config import get_settings
settings = get_settings()
env = WebScraperEnv(episode_id="test-001", settings=settings)
# Test reset
obs, info = await env.reset(task_id="task_001")
passed = obs is not None and info.get("episode_id") == "test-001"
details = {
"episode_id": info.get("episode_id"),
"task_id": info.get("task_id"),
"observation_fields": list(obs.__dict__.keys()) if obs else []
}
self.reporter.add_result(TestResult(
name="Environment Reset",
complexity=TestComplexity.LOW,
component="Scraper",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Environment Reset",
complexity=TestComplexity.LOW,
component="Scraper",
passed=False,
duration=time.time() - start,
error=str(e)
))
# MID: Navigation and extraction actions
start = time.time()
try:
from app.core.env import WebScraperEnv
from app.core.action import Action, ActionType
from app.config import get_settings
settings = get_settings()
env = WebScraperEnv(episode_id="test-002", settings=settings)
await env.reset(task_id="task_001")
# Navigate action
nav_action = Action(
action_type=ActionType.NAVIGATE,
parameters={"url": "https://example.com"},
reasoning="Testing navigation"
)
obs, reward, breakdown, terminated, truncated, info = await env.step(nav_action)
# Extract action
extract_action = Action(
action_type=ActionType.EXTRACT_FIELD,
parameters={"field_name": "product_name", "selector": "h1"},
reasoning="Testing extraction"
)
obs2, reward2, breakdown2, terminated2, truncated2, info2 = await env.step(extract_action)
passed = obs is not None and reward is not None and obs2 is not None
details = {
"nav_reward": reward,
"extract_reward": reward2,
"extracted_fields": len(obs2.extracted_so_far) if obs2 else 0,
"current_url": obs.current_url if obs else None
}
self.reporter.add_result(TestResult(
name="Navigation & Extraction",
complexity=TestComplexity.MID,
component="Scraper",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Navigation & Extraction",
complexity=TestComplexity.MID,
component="Scraper",
passed=False,
duration=time.time() - start,
error=str(e)
))
# HIGH: Full episode with multiple actions and completion
start = time.time()
try:
from app.core.env import WebScraperEnv
from app.core.action import Action, ActionType
from app.config import get_settings
settings = get_settings()
env = WebScraperEnv(episode_id="test-003", settings=settings)
await env.reset(task_id="task_001")
actions = [
Action(action_type=ActionType.NAVIGATE, parameters={"url": "https://example.com/product/123"}, reasoning="Navigate to product"),
Action(action_type=ActionType.EXTRACT_FIELD, parameters={"field_name": "product_name"}, reasoning="Extract name"),
Action(action_type=ActionType.EXTRACT_FIELD, parameters={"field_name": "price"}, reasoning="Extract price"),
Action(action_type=ActionType.EXTRACT_FIELD, parameters={"field_name": "description"}, reasoning="Extract description"),
Action(action_type=ActionType.DONE, parameters={"success": True}, reasoning="Task complete"),
]
total_reward = 0
final_obs = None
for action in actions:
obs, reward, breakdown, terminated, truncated, info = await env.step(action)
total_reward += reward
final_obs = obs
if terminated or truncated:
break
state = env.get_state()
passed = state.get("is_terminal", False) and len(final_obs.extracted_so_far) >= 3
details = {
"total_reward": total_reward,
"steps_taken": state.get("step_number", 0),
"extracted_fields": len(final_obs.extracted_so_far) if final_obs else 0,
"is_terminal": state.get("is_terminal", False),
"status": state.get("status", "unknown")
}
self.reporter.add_result(TestResult(
name="Full Episode Completion",
complexity=TestComplexity.HIGH,
component="Scraper",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Full Episode Completion",
complexity=TestComplexity.HIGH,
component="Scraper",
passed=False,
duration=time.time() - start,
error=str(e)
))
# =========================================================================
# REWARD FUNCTION TESTS
# =========================================================================
async def test_reward_function(self):
"""Test reward calculation at different complexity levels."""
# LOW: Basic reward computation
start = time.time()
try:
from app.core.reward import RewardEngine, RewardBreakdown
from app.core.action import Action, ActionType
from app.core.observation import Observation, TaskContext, ExtractedField
from app.config import get_settings
settings = get_settings()
engine = RewardEngine(settings)
# Create test observation
prev_obs = Observation(
episode_id="test",
task_id="task_001",
step_number=0,
extraction_progress=0.0
)
new_obs = Observation(
episode_id="test",
task_id="task_001",
step_number=1,
extraction_progress=0.33,
extracted_so_far=[
ExtractedField(field_name="product_name", value="Test Product", confidence=0.9)
]
)
action = Action(action_type=ActionType.EXTRACT_FIELD, parameters={"field_name": "product_name"})
reward, breakdown = engine.compute_reward(action, prev_obs, new_obs, max_steps=50)
passed = isinstance(reward, float) and isinstance(breakdown, RewardBreakdown)
details = {
"reward": reward,
"accuracy": breakdown.accuracy,
"efficiency": breakdown.efficiency,
"completeness": breakdown.completeness,
"total": breakdown.total
}
self.reporter.add_result(TestResult(
name="Basic Reward Computation",
complexity=TestComplexity.LOW,
component="Reward",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Basic Reward Computation",
complexity=TestComplexity.LOW,
component="Reward",
passed=False,
duration=time.time() - start,
error=str(e)
))
# MID: Reward with ground truth accuracy
start = time.time()
try:
from app.core.reward import RewardEngine
from app.core.action import Action, ActionType
from app.core.observation import Observation, ExtractedField
from app.config import get_settings
settings = get_settings()
engine = RewardEngine(settings)
engine.reset()
# Test with ground truth
ground_truth = {"product_name": "Test Product", "price": 99.99}
prev_obs = Observation(episode_id="test", task_id="task_001", step_number=0, extraction_progress=0.0)
new_obs = Observation(
episode_id="test",
task_id="task_001",
step_number=1,
extraction_progress=0.5,
extracted_so_far=[
ExtractedField(field_name="product_name", value="Test Product", confidence=0.95),
ExtractedField(field_name="price", value=99.99, confidence=0.9),
]
)
action = Action(action_type=ActionType.EXTRACT_FIELD, parameters={"field_name": "price"})
reward, breakdown = engine.compute_reward(action, prev_obs, new_obs, ground_truth=ground_truth, max_steps=50)
passed = breakdown.accuracy == 1.0 # Perfect match
details = {
"reward": reward,
"accuracy": breakdown.accuracy,
"ground_truth_match": breakdown.accuracy == 1.0,
"progress_bonus": breakdown.progress_bonus
}
self.reporter.add_result(TestResult(
name="Reward with Ground Truth",
complexity=TestComplexity.MID,
component="Reward",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Reward with Ground Truth",
complexity=TestComplexity.MID,
component="Reward",
passed=False,
duration=time.time() - start,
error=str(e)
))
# HIGH: Terminal reward and penalties
start = time.time()
try:
from app.core.reward import RewardEngine
from app.core.observation import Observation, ExtractedField
from app.config import get_settings
settings = get_settings()
engine = RewardEngine(settings)
# Test terminal reward
final_obs = Observation(
episode_id="test",
task_id="task_001",
step_number=10,
extraction_progress=1.0,
extracted_so_far=[
ExtractedField(field_name="product_name", value="Test Product", confidence=0.95),
ExtractedField(field_name="price", value=99.99, confidence=0.9),
ExtractedField(field_name="description", value="Great product", confidence=0.85),
]
)
ground_truth = {"product_name": "Test Product", "price": 99.99, "description": "Great product"}
terminal_reward, terminal_breakdown = engine.compute_terminal_reward(
final_obs, success=True, ground_truth=ground_truth
)
passed = terminal_reward > 0 and terminal_breakdown.completeness == 1.0
details = {
"terminal_reward": terminal_reward,
"completeness": terminal_breakdown.completeness,
"accuracy": terminal_breakdown.accuracy,
"efficiency": terminal_breakdown.efficiency,
"progress_bonus": terminal_breakdown.progress_bonus
}
self.reporter.add_result(TestResult(
name="Terminal Reward Calculation",
complexity=TestComplexity.HIGH,
component="Reward",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Terminal Reward Calculation",
complexity=TestComplexity.HIGH,
component="Reward",
passed=False,
duration=time.time() - start,
error=str(e)
))
# =========================================================================
# PLUGINS TESTS
# =========================================================================
async def test_plugins(self):
"""Test plugin system at different complexity levels."""
# LOW: List plugins
start = time.time()
try:
from app.api.routes.plugins import PLUGIN_REGISTRY, _installed_plugins
total_plugins = sum(len(plugins) for plugins in PLUGIN_REGISTRY.values())
categories = list(PLUGIN_REGISTRY.keys())
passed = total_plugins > 0 and len(categories) > 0
details = {
"total_plugins": total_plugins,
"categories": categories,
"installed_count": len(_installed_plugins)
}
self.reporter.add_result(TestResult(
name="List Plugins",
complexity=TestComplexity.LOW,
component="Plugins",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="List Plugins",
complexity=TestComplexity.LOW,
component="Plugins",
passed=False,
duration=time.time() - start,
error=str(e)
))
# MID: Install/uninstall plugin
start = time.time()
try:
from app.api.routes.plugins import _installed_plugins, PLUGIN_REGISTRY
# Find a plugin that's not installed
test_plugin_id = None
for plugins in PLUGIN_REGISTRY.values():
for plugin in plugins:
if plugin["id"] not in _installed_plugins and "captcha" not in plugin["id"]:
test_plugin_id = plugin["id"]
break
if test_plugin_id:
break
if test_plugin_id:
# Install
_installed_plugins.add(test_plugin_id)
is_installed = test_plugin_id in _installed_plugins
# Uninstall
_installed_plugins.discard(test_plugin_id)
is_uninstalled = test_plugin_id not in _installed_plugins
passed = is_installed and is_uninstalled
details = {
"test_plugin": test_plugin_id,
"install_success": is_installed,
"uninstall_success": is_uninstalled
}
else:
passed = True
details = {"message": "No test plugin available (all installed)"}
self.reporter.add_result(TestResult(
name="Install/Uninstall Plugin",
complexity=TestComplexity.MID,
component="Plugins",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Install/Uninstall Plugin",
complexity=TestComplexity.MID,
component="Plugins",
passed=False,
duration=time.time() - start,
error=str(e)
))
# HIGH: Plugin categories and core plugins check
start = time.time()
try:
from app.api.routes.plugins import PLUGIN_REGISTRY, _installed_plugins
# Check that all categories have plugins
categories_with_plugins = {cat: len(plugins) for cat, plugins in PLUGIN_REGISTRY.items()}
# Check core plugins are installed
core_plugins = {"mcp-browser", "mcp-search", "mcp-html", "skill-planner", "skill-navigator", "skill-extractor", "skill-verifier", "proc-json"}
core_installed = core_plugins.intersection(_installed_plugins)
# Check AI providers
ai_providers = {"google-api", "groq-api", "nvidia-api"}
ai_installed = ai_providers.intersection(_installed_plugins)
passed = len(core_installed) >= 6 and len(ai_installed) >= 2
details = {
"categories": categories_with_plugins,
"core_plugins_installed": list(core_installed),
"ai_providers_installed": list(ai_installed),
"total_installed": len(_installed_plugins)
}
self.reporter.add_result(TestResult(
name="Plugin Categories & Core Plugins",
complexity=TestComplexity.HIGH,
component="Plugins",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Plugin Categories & Core Plugins",
complexity=TestComplexity.HIGH,
component="Plugins",
passed=False,
duration=time.time() - start,
error=str(e)
))
# =========================================================================
# EMBEDDINGS TESTS (Gemini)
# =========================================================================
async def test_embeddings(self):
"""Test embeddings service with Gemini."""
# LOW: Create embeddings service
start = time.time()
try:
from app.core.embeddings import EmbeddingsService, create_embeddings_service
api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
model = os.getenv("GEMINI_MODEL_EMBEDDING", "models/gemini-embedding-2-preview")
service = create_embeddings_service(
provider="google",
model=model,
api_key=api_key
)
passed = service is not None and service.provider == "google"
details = {
"provider": service.provider,
"model": service.model,
"has_api_key": api_key is not None
}
self.reporter.add_result(TestResult(
name="Create Embeddings Service",
complexity=TestComplexity.LOW,
component="Embeddings",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Create Embeddings Service",
complexity=TestComplexity.LOW,
component="Embeddings",
passed=False,
duration=time.time() - start,
error=str(e)
))
# MID: Generate single embedding
start = time.time()
try:
from app.core.embeddings import create_embeddings_service
import numpy as np
api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
model = os.getenv("GEMINI_MODEL_EMBEDDING", "models/gemini-embedding-2-preview")
service = create_embeddings_service(
provider="google",
model=model,
api_key=api_key
)
# Generate embedding
text = "This is a test document about web scraping and data extraction."
embedding = await service.embed_text(text)
passed = isinstance(embedding, np.ndarray) and len(embedding) > 0
details = {
"embedding_dim": len(embedding),
"embedding_type": str(embedding.dtype),
"text_length": len(text),
"sample_values": embedding[:5].tolist() if len(embedding) > 5 else embedding.tolist()
}
self.reporter.add_result(TestResult(
name="Generate Single Embedding",
complexity=TestComplexity.MID,
component="Embeddings",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Generate Single Embedding",
complexity=TestComplexity.MID,
component="Embeddings",
passed=False,
duration=time.time() - start,
error=str(e)
))
# HIGH: Batch embeddings and similarity search
start = time.time()
try:
from app.core.embeddings import create_embeddings_service
import numpy as np
api_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
model = os.getenv("GEMINI_MODEL_EMBEDDING", "models/gemini-embedding-2-preview")
service = create_embeddings_service(
provider="google",
model=model,
api_key=api_key
)
# Generate batch embeddings
texts = [
"Web scraping extracts data from websites",
"Machine learning uses neural networks",
"Data extraction from HTML pages",
]
embeddings = await service.embed_batch(texts)
query_embedding = await service.embed_query("scraping data from web")
# Find most similar
similar = service.find_most_similar(query_embedding, list(embeddings), top_k=2)
passed = len(embeddings) == 3 and len(similar) == 2
details = {
"batch_size": len(texts),
"embeddings_shape": embeddings.shape if hasattr(embeddings, 'shape') else len(embeddings),
"top_match_index": similar[0][0] if similar else None,
"top_match_score": similar[0][1] if similar else None,
"similarity_ranking": [(idx, round(score, 4)) for idx, score in similar]
}
self.reporter.add_result(TestResult(
name="Batch Embeddings & Similarity Search",
complexity=TestComplexity.HIGH,
component="Embeddings",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Batch Embeddings & Similarity Search",
complexity=TestComplexity.HIGH,
component="Embeddings",
passed=False,
duration=time.time() - start,
error=str(e)
))
# =========================================================================
# VECTOR SEARCH / MEMORY TESTS
# =========================================================================
async def test_vector_search(self):
"""Test vector search and memory system."""
# LOW: Initialize memory manager
start = time.time()
try:
from app.memory.manager import MemoryManager, MemoryType
from app.config import get_settings
settings = get_settings()
manager = MemoryManager(settings)
await manager.initialize()
passed = manager.is_initialized
stats = await manager.get_stats()
details = {
"initialized": manager.is_initialized,
"short_term_stats": stats.short_term,
"working_stats": stats.working,
"long_term_stats": stats.long_term
}
self.reporter.add_result(TestResult(
name="Initialize Memory Manager",
complexity=TestComplexity.LOW,
component="Memory",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Initialize Memory Manager",
complexity=TestComplexity.LOW,
component="Memory",
passed=False,
duration=time.time() - start,
error=str(e)
))
# MID: Store and retrieve from different memory types
start = time.time()
try:
from app.memory.manager import MemoryManager, MemoryType
from app.config import get_settings
settings = get_settings()
manager = MemoryManager(settings)
await manager.initialize()
# Test short-term memory
await manager.store("test_key", "test_value", MemoryType.SHORT_TERM)
short_term_result = await manager.retrieve("test_key", MemoryType.SHORT_TERM)
# Test working memory
await manager.store("thought_1", "This is a test thought", MemoryType.WORKING, priority=0.5)
working_result = await manager.retrieve("thought_1", MemoryType.WORKING)
# Test shared memory
await manager.store("shared_key", {"data": "shared_value"}, MemoryType.SHARED)
shared_result = await manager.retrieve("shared_key", MemoryType.SHARED)
passed = (
short_term_result == "test_value" and
working_result == "This is a test thought" and
shared_result == {"data": "shared_value"}
)
details = {
"short_term": short_term_result,
"working": working_result,
"shared": shared_result
}
# Cleanup
await manager.clear()
self.reporter.add_result(TestResult(
name="Store & Retrieve Memory",
complexity=TestComplexity.MID,
component="Memory",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Store & Retrieve Memory",
complexity=TestComplexity.MID,
component="Memory",
passed=False,
duration=time.time() - start,
error=str(e)
))
# HIGH: Long-term memory with vector search
start = time.time()
try:
from app.memory.manager import MemoryManager, MemoryType
from app.config import get_settings
settings = get_settings()
manager = MemoryManager(settings)
await manager.initialize()
# Store documents
doc1 = await manager.remember("Web scraping extracts data from websites using automated tools")
doc2 = await manager.remember("Machine learning models can predict outcomes based on data")
doc3 = await manager.remember("Data extraction from HTML pages requires parsing the DOM")
# Search
results = await manager.recall("scraping data from web", top_k=2)
passed = len(results) >= 1 or manager.long_term._using_fallback
details = {
"documents_stored": 3,
"search_results": len(results),
"using_fallback": manager.long_term._using_fallback,
"top_result_score": results[0].score if results else None
}
# Cleanup
await manager.clear(MemoryType.LONG_TERM)
self.reporter.add_result(TestResult(
name="Long-term Memory & Vector Search",
complexity=TestComplexity.HIGH,
component="Memory",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Long-term Memory & Vector Search",
complexity=TestComplexity.HIGH,
component="Memory",
passed=False,
duration=time.time() - start,
error=str(e)
))
# =========================================================================
# AI PROVIDERS TESTS
# =========================================================================
async def test_ai_providers(self):
"""Test AI providers (NVIDIA, Groq)."""
# LOW: Test NVIDIA provider initialization
start = time.time()
try:
from app.models.router import SmartModelRouter
nvidia_key = os.getenv("NVIDIA_API_KEY")
groq_key = os.getenv("GROQ_API_KEY")
google_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
router = SmartModelRouter(
nvidia_api_key=nvidia_key,
groq_api_key=groq_key,
google_api_key=google_key
)
await router.initialize()
providers = list(router.providers.keys())
has_nvidia = "nvidia" in providers
has_groq = "groq" in providers
passed = has_nvidia or has_groq
details = {
"available_providers": providers,
"has_nvidia": has_nvidia,
"has_groq": has_groq,
"nvidia_key_present": nvidia_key is not None,
"groq_key_present": groq_key is not None
}
self.reporter.add_result(TestResult(
name="AI Provider Initialization",
complexity=TestComplexity.LOW,
component="AI Providers",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="AI Provider Initialization",
complexity=TestComplexity.LOW,
component="AI Providers",
passed=False,
duration=time.time() - start,
error=str(e)
))
# MID: Test NVIDIA completion
start = time.time()
try:
from app.models.router import SmartModelRouter
from app.models.providers.base import TaskType
nvidia_key = os.getenv("NVIDIA_API_KEY")
groq_key = os.getenv("GROQ_API_KEY")
google_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
router = SmartModelRouter(
nvidia_api_key=nvidia_key,
groq_api_key=groq_key,
google_api_key=google_key
)
await router.initialize()
messages = [{"role": "user", "content": "What is 2+2? Reply with just the number."}]
response = await router.complete(
messages=messages,
task_type=TaskType.GENERAL,
model="llama-3.3-70b",
max_tokens=50,
fallback=False
)
passed = response is not None and response.content is not None
details = {
"model_used": response.model if response else None,
"provider_used": response.provider if response else None,
"content_preview": response.content[:100] if response and response.content else None,
"total_tokens": response.usage.total_tokens if response and response.usage else None
}
self.reporter.add_result(TestResult(
name="NVIDIA Completion",
complexity=TestComplexity.MID,
component="AI Providers",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="NVIDIA Completion",
complexity=TestComplexity.MID,
component="AI Providers",
passed=False,
duration=time.time() - start,
error=str(e)
))
# HIGH: Test Groq completion and fallback
start = time.time()
try:
from app.models.router import SmartModelRouter
from app.models.providers.base import TaskType
nvidia_key = os.getenv("NVIDIA_API_KEY")
groq_key = os.getenv("GROQ_API_KEY")
google_key = os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")
router = SmartModelRouter(
nvidia_api_key=nvidia_key,
groq_api_key=groq_key,
google_api_key=google_key
)
await router.initialize()
messages = [{"role": "user", "content": "Write a Python function to calculate factorial. Be concise."}]
# Test Groq
response = await router.complete(
messages=messages,
task_type=TaskType.CODE,
model="llama-3.3-70b-versatile",
max_tokens=200,
fallback=False
)
passed = response is not None and response.content is not None and "def" in response.content.lower()
details = {
"model_used": response.model if response else None,
"provider_used": response.provider if response else None,
"content_preview": response.content[:200] if response and response.content else None,
"has_code": "def" in response.content.lower() if response and response.content else False
}
self.reporter.add_result(TestResult(
name="Groq Code Generation",
complexity=TestComplexity.HIGH,
component="AI Providers",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Groq Code Generation",
complexity=TestComplexity.HIGH,
component="AI Providers",
passed=False,
duration=time.time() - start,
error=str(e)
))
# =========================================================================
# API ENDPOINTS TESTS
# =========================================================================
async def test_api_endpoints(self):
"""Test API endpoints."""
# LOW: Test tasks endpoint
start = time.time()
try:
from app.api.routes.tasks import TASK_REPOSITORY, list_tasks
# Direct function call (simulating endpoint)
response = await list_tasks()
passed = response.total > 0 and len(response.tasks) > 0
details = {
"total_tasks": response.total,
"tasks_returned": len(response.tasks),
"task_ids": [t.id for t in response.tasks]
}
self.reporter.add_result(TestResult(
name="List Tasks Endpoint",
complexity=TestComplexity.LOW,
component="API",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="List Tasks Endpoint",
complexity=TestComplexity.LOW,
component="API",
passed=False,
duration=time.time() - start,
error=str(e)
))
# MID: Test plugins endpoint
start = time.time()
try:
from app.api.routes.plugins import list_plugins, list_installed_plugins
all_plugins = await list_plugins()
installed = await list_installed_plugins()
passed = "plugins" in all_plugins and installed["count"] > 0
details = {
"total_plugins": all_plugins["stats"]["total"],
"installed": installed["count"],
"categories": all_plugins["categories"]
}
self.reporter.add_result(TestResult(
name="Plugins Endpoint",
complexity=TestComplexity.MID,
component="API",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Plugins Endpoint",
complexity=TestComplexity.MID,
component="API",
passed=False,
duration=time.time() - start,
error=str(e)
))
# HIGH: Test episode lifecycle
start = time.time()
try:
from app.api.deps import create_environment, get_environment, remove_environment, list_environments
from app.config import get_settings
settings = get_settings()
# Create environment
episode_id = "api-test-001"
env = create_environment(episode_id, settings)
# Reset
obs, info = await env.reset(task_id="task_001")
# List
envs = list_environments()
# Get state
state = env.get_state()
# Remove
removed = remove_environment(episode_id)
passed = (
episode_id in envs and
state["task_id"] == "task_001" and
removed
)
details = {
"episode_id": episode_id,
"task_id": state.get("task_id"),
"environments_listed": len(envs),
"removed": removed
}
self.reporter.add_result(TestResult(
name="Episode Lifecycle",
complexity=TestComplexity.HIGH,
component="API",
passed=passed,
duration=time.time() - start,
details=details
))
except Exception as e:
self.reporter.add_result(TestResult(
name="Episode Lifecycle",
complexity=TestComplexity.HIGH,
component="API",
passed=False,
duration=time.time() - start,
error=str(e)
))
async def main():
"""Run the test suite."""
suite = ScrapeRLTestSuite()
results = await suite.run_all_tests()
# Return exit code based on test results
passed = sum(1 for r in results if r.passed)
total = len(results)
return 0 if passed == total else 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)