Spaces:

DevodG
/

Janus-backend

Running

DevodG commited on 17 days ago

Commit

b1a271a

1 Parent(s): 7a52dca

feat: MiroOrg v1.1 — AI Financial Intelligence System

- 6-layer architecture: Core Platform, Agents, Domain Packs, Simulation, Learning, Sentinel
- Finance domain pack: stance detection, event analysis, ticker/entity resolution
- Finance intelligence API: /finance/ticker, /finance/news/analyze, /finance/headlines
- Learning layer: knowledge ingestion, skill distillation, prompt optimization, A/B testing
- Sentinel layer: health monitoring, LLM-based diagnosis, safe auto-patching, capability tracking
- Frontend: JANUS dark UI with Command/Intel Stream/Markets tabs
- Intel Stream: news search with Deep Research (full 5-agent pipeline) on any article
- Markets: ticker search with TradingView chart (NSE/BSE support), AI signal, Deep Research
- Top navigation bar replacing sidebar for full-width layout
- All sensitive data excluded via .gitignore (no .env, no runtime JSON, no API keys)

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +46 -4
.kiro/specs/ai-financial-intelligence-system/requirements.md +85 -0
.kiro/specs/ai-financial-intelligence-system/tasks.md +104 -104
ARCHITECTURE.md +567 -0
README.md +282 -116
backend/.env.example +10 -1
backend/app/agents/planner.py +45 -2
backend/app/agents/research.py +64 -2
backend/app/agents/switchboard.py +26 -3
backend/app/agents/synthesizer.py +69 -2
backend/app/agents/verifier.py +75 -2
backend/app/config.py +58 -5
backend/app/data/sentinel/.gitkeep +8 -0
backend/app/data/sentinel/pending_patches/.gitkeep +3 -0
backend/app/graph.py +69 -11
backend/app/main.py +104 -10
backend/app/prompts/planner.txt +89 -17
backend/app/prompts/research.txt +87 -19
backend/app/prompts/synthesizer.txt +100 -12
backend/app/prompts/verifier.txt +111 -16
backend/app/routers/finance.py +297 -0
backend/app/routers/learning.py +282 -0
backend/app/routers/sentinel.py +330 -0
backend/app/routers/simulation.py +86 -9
backend/app/schemas.py +161 -15
backend/app/services/api_discovery/__init__.py +13 -0
backend/app/services/api_discovery/catalog_loader.py +85 -0
backend/app/services/api_discovery/classifier.py +92 -0
backend/app/services/api_discovery/metadata_store.py +60 -0
backend/app/services/api_discovery/scorer.py +99 -0
backend/app/services/case_store.py +16 -0
backend/app/services/external_sources.py +37 -10
backend/app/services/learning/__init__.py +29 -0
backend/app/services/learning/freshness_manager.py +10 -0
backend/app/services/learning/knowledge_ingestor.py +193 -0
backend/app/services/learning/knowledge_store.py +225 -0
backend/app/services/learning/learning_engine.py +439 -0
backend/app/services/learning/prompt_optimizer.py +274 -0
backend/app/services/learning/scheduler.py +220 -0
backend/app/services/learning/skill_distiller.py +279 -0
backend/app/services/learning/trust_manager.py +245 -0
backend/app/services/mirofish_client.py +237 -53
backend/app/services/sentinel/__init__.py +14 -0
backend/app/services/sentinel/capability_tracker.py +414 -0
backend/app/services/sentinel/diagnostician.py +219 -0
backend/app/services/sentinel/patcher.py +359 -0
backend/app/services/sentinel/scheduler.py +141 -0
backend/app/services/sentinel/sentinel_engine.py +349 -0
backend/app/services/sentinel/watcher.py +370 -0
backend/app/services/simulation_store.py +51 -2

.gitignore CHANGED Viewed

@@ -1,10 +1,31 @@
 backend/.venv/
 backend/.env
 frontend/node_modules/
 frontend/.next/
-.DS_Store
-**/__pycache__/
-*.pyc
 backend/app/data/memory/*.json
 backend/app/data/simulations/*.json
 backend/app/data/logs/*
@@ -12,5 +33,26 @@ backend/app/data/knowledge/*.json
 backend/app/data/skills/*.json
 backend/app/data/prompt_versions/*.json
 backend/app/data/learning/*.json
-.venv/
 *.log

+# ── Python ────────────────────────────────────────────────────
 backend/.venv/
+.venv/
+**/__pycache__/
+*.pyc
+*.pyo
+*.pyd
+*.egg-info/
+dist/
+build/
+# ── Environment / Secrets ─────────────────────────────────────
 backend/.env
+.env
+.env.local
+.env.*.local
+*.key
+*.pem
+*.p12
+*.pfx
+# ── Frontend ──────────────────────────────────────────────────
 frontend/node_modules/
 frontend/.next/
+frontend/.turbo/
+frontend/out/
+# ── Runtime data (never commit) ───────────────────────────────
 backend/app/data/memory/*.json
 backend/app/data/simulations/*.json
 backend/app/data/logs/*
 backend/app/data/skills/*.json
 backend/app/data/prompt_versions/*.json
 backend/app/data/learning/*.json
+# Sentinel runtime data
+backend/app/data/sentinel/*.json
+backend/app/data/sentinel/pending_patches/*.json
+!backend/app/data/sentinel/.gitkeep
+!backend/app/data/sentinel/pending_patches/.gitkeep
+# ── OS / Editor ───────────────────────────────────────────────
+.DS_Store
+.DS_Store?
+Thumbs.db
+.vscode/settings.json
+.vscode/launch.json
 *.log
+# ── Misc ──────────────────────────────────────────────────────
+*.bak
+*.tmp
+*_new.tsx
+*_old.tsx
+# Stray nested directory
+backend/backend/

.kiro/specs/ai-financial-intelligence-system/requirements.md CHANGED Viewed

@@ -471,3 +471,88 @@ This document specifies requirements for MiroOrg v1.1, a general intelligence op
 60. THE System SHALL provide GET /prompts/versions/{name} endpoint for prompt version history
 61. THE System SHALL provide POST /prompts/optimize/{name} endpoint for prompt optimization
 62. THE System SHALL provide POST /prompts/promote/{name}/{version} endpoint for promoting prompt versions

 60. THE System SHALL provide GET /prompts/versions/{name} endpoint for prompt version history
 61. THE System SHALL provide POST /prompts/optimize/{name} endpoint for prompt optimization
 62. THE System SHALL provide POST /prompts/promote/{name}/{version} endpoint for promoting prompt versions
+### Requirement 18: Sentinel Layer (Adaptive Maintenance)
+**User Story:** As a user, I want the system to monitor its own health and propose safe corrections when issues are detected, so that the system can self-heal minor problems without breaking anything or requiring constant manual intervention.
+#### Acceptance Criteria
+1. THE System SHALL implement a Sentinel Layer as Layer 6 in the architecture
+2. THE System SHALL observe system health metrics: provider success rates, agent output quality, error frequencies, response times, storage usage
+3. THE System SHALL diagnose common issues: provider failures, degraded prompts, stale knowledge, configuration drift
+4. THE System SHALL propose limited safe corrections: prompt adjustments, configuration tweaks, knowledge refresh
+5. THE System SHALL NEVER autonomously edit Python or TypeScript code
+6. THE System SHALL NEVER make risky changes without explicit human approval
+7. THE System SHALL maintain a conservative safety-first approach
+#### Health Monitoring
+8. THE System SHALL track provider health: success rate, average latency, error types, fallback frequency
+9. THE System SHALL track agent performance: output quality, confidence scores, execution time, error rates
+10. THE System SHALL track storage health: disk usage, cache hit rates, eviction frequency
+11. THE System SHALL track external API health: rate limit status, error rates, response times
+12. THE System SHALL detect anomalies: sudden quality drops, repeated errors, resource exhaustion
+#### Diagnostic Capabilities
+13. THE System SHALL diagnose provider issues: API key expiration, rate limiting, service outages
+14. THE System SHALL diagnose prompt degradation: low confidence scores, repeated failures, user corrections
+15. THE System SHALL diagnose knowledge staleness: outdated information, expired cache entries
+16. THE System SHALL diagnose configuration drift: missing environment variables, invalid settings
+17. THE System SHALL provide clear diagnostic reports with evidence and severity levels
+#### Safe Correction Proposals
+18. THE System SHALL propose prompt improvements when quality degrades
+19. THE System SHALL propose knowledge refresh when staleness is detected
+20. THE System SHALL propose configuration updates when drift is detected
+21. THE System SHALL propose provider fallback adjustments when reliability changes
+22. EACH proposal SHALL include: issue description, proposed fix, risk level, rollback plan
+23. THE System SHALL require human approval for MEDIUM and HIGH risk changes
+24. THE System SHALL auto-apply only LOW risk changes with full audit logging
+#### Safety Constraints
+25. THE System SHALL NEVER modify source code files (.py, .ts, .tsx, .js)
+26. THE System SHALL NEVER delete data directories or user content
+27. THE System SHALL NEVER change API keys or credentials
+28. THE System SHALL NEVER disable critical system components
+29. THE System SHALL maintain full audit logs of all proposed and applied changes
+30. THE System SHALL provide rollback capability for all applied changes
+#### Capability Trend Tracking
+31. THE System SHALL track capability trends over time: answer quality, user satisfaction proxies, system reliability
+32. THE System SHALL detect capability regressions: quality drops, increased errors, slower responses
+33. THE System SHALL correlate regressions with recent changes: prompt updates, configuration changes, knowledge updates
+34. THE System SHALL alert when capabilities degrade beyond acceptable thresholds
+#### Scheduler Integration
+35. THE System SHALL run health checks on a configurable schedule (default: hourly)
+36. THE System SHALL run diagnostics when anomalies are detected
+37. THE System SHALL use the same lightweight scheduler as the learning layer
+38. THE System SHALL respect system resource constraints (CPU, battery, idle time)
+39. THE System SHALL NOT interfere with user operations or learning tasks
+#### API Endpoints
+40. THE System SHALL provide GET /sentinel/status endpoint for sentinel system status
+41. THE System SHALL provide GET /sentinel/health endpoint for comprehensive health report
+42. THE System SHALL provide GET /sentinel/diagnostics endpoint for recent diagnostic results
+43. THE System SHALL provide GET /sentinel/proposals endpoint for pending correction proposals
+44. THE System SHALL provide POST /sentinel/proposals/{id}/approve endpoint for approving proposals
+45. THE System SHALL provide POST /sentinel/proposals/{id}/reject endpoint for rejecting proposals
+46. THE System SHALL provide GET /sentinel/audit endpoint for audit log of applied changes
+47. THE System SHALL provide POST /sentinel/rollback/{change_id} endpoint for rolling back changes
+#### Integration with Existing Layers
+48. THE System SHALL integrate with learning layer for quality metrics
+49. THE System SHALL integrate with provider abstraction for health monitoring
+50. THE System SHALL integrate with prompt management for prompt quality tracking
+51. THE System SHALL integrate with knowledge store for staleness detection
+52. THE System SHALL integrate with configuration system for drift detection

.kiro/specs/ai-financial-intelligence-system/tasks.md CHANGED Viewed

@@ -114,72 +114,72 @@ The implementation follows 9 phases, each building on the previous while maintai
 ### Phase 3: Agent Enhancement with Domain Intelligence
-- [ ] 7. Enhance Switchboard with domain detection
-  - [ ] 7.1 Add domain pack dimension to routing
     - Modify `backend/app/agents/switchboard.py` to add domain_pack to routing decision
     - Implement domain detection using domain registry
     - Update RouteDecision schema in `backend/app/schemas.py`
     - _Requirements: 4.1, 5.6_
-  - [ ] 7.2 Implement complexity-based routing logic
     - Ensure simple queries (≤5 words) route to solo mode
     - Ensure medium queries (≤25 words) route to standard mode
     - Ensure complex queries (>25 words) route to deep mode
     - _Requirements: 4.2, 4.3, 4.4_
-  - [ ] 7.3 Implement simulation keyword detection
     - Add simulation trigger keyword detection
     - Load keywords from environment configuration
     - Set task_family="simulation" when keywords detected
     - _Requirements: 4.5, 4.6, 4.7_
-- [ ] 8. Enhance Research Agent with domain capabilities
-  - [ ] 8.1 Integrate domain pack research enhancement
     - Modify `backend/app/agents/research.py` to detect domain
     - Call domain pack enhance_research() when domain detected
     - Add structured entity extraction output
     - _Requirements: 5.3, 7.1, 7.2, 7.3, 7.4_
-  - [ ] 8.2 Update research agent prompt
     - Update `backend/app/prompts/research.txt` with domain intelligence instructions
     - Add instructions for entity and ticker extraction
     - Add instructions for structured output
     - _Requirements: 7.14_
-- [ ] 9. Enhance Verifier Agent with domain capabilities
-  - [ ] 9.1 Integrate domain pack verification enhancement
     - Modify `backend/app/agents/verifier.py` to detect domain
     - Call domain pack enhance_verification() when domain detected
     - Add structured credibility scoring output
     - _Requirements: 5.3, 7.7, 7.8, 7.9_
-  - [ ] 9.2 Update verifier agent prompt
     - Update `backend/app/prompts/verifier.txt` with domain intelligence instructions
     - Add instructions for rumor and scam detection
     - Add instructions for uncertainty surfacing
     - _Requirements: 7.14_
-- [ ] 10. Enhance Planner and Synthesizer Agents
-  - [ ] 10.1 Add simulation mode suggestion to Planner
     - Modify `backend/app/agents/planner.py` to detect simulation opportunities
     - Add simulation_suggested field to output
     - Update `backend/app/prompts/planner.txt` with simulation guidance
     - _Requirements: 7.6, 7.14_
-  - [ ] 10.2 Add uncertainty quantification to Synthesizer
     - Modify `backend/app/agents/synthesizer.py` to quantify uncertainty
     - Add simulation recommendation logic
     - Update `backend/app/prompts/synthesizer.txt` with uncertainty instructions
     - _Requirements: 7.11, 7.12, 7.13, 7.14_
-- [ ] 11. Update graph execution with domain context
-  - [ ] 11.1 Pass domain pack context through pipeline
     - Modify `backend/app/graph.py` to detect domain early
     - Pass domain context to all agents
     - Ensure domain-enhanced execution flows correctly
     - _Requirements: 2.5, 5.3, 7.15_
-- [ ] 12. Checkpoint - Verify agent enhancements
   - Ensure Switchboard detects finance domain correctly
   - Verify Research agent extracts entities and tickers
   - Verify Verifier agent scores credibility
@@ -188,32 +188,32 @@ The implementation follows 9 phases, each building on the previous while maintai
 ### Phase 4: Simulation Integration Enhancement
-- [ ] 13. Enhance simulation workflow and case linking
-  - [ ] 13.1 Add case linking to simulation router
     - Modify `backend/app/routers/simulation.py` to link case_id
     - Improve error messages for MiroFish failures
     - Add better status reporting
     - _Requirements: 8.1, 8.11, 8.12_
-  - [ ] 13.2 Enhance simulation store with search and filtering
     - Modify `backend/app/services/simulation_store.py`
     - Add simulation search by title or prediction_goal
     - Add simulation filtering by status
     - _Requirements: 8.10_
-  - [ ] 13.3 Update case storage for simulation linking
     - Modify `backend/app/services/case_store.py` to add simulation_id field
     - Add case-to-simulation lookup functionality
     - Update CaseRecord schema in `backend/app/schemas.py`
     - _Requirements: 10.8, 14.9_
-  - [ ] 13.4 Add simulation workflow to graph execution
     - Modify `backend/app/graph.py` to add simulation handoff logic
     - Add simulation result synthesis
     - Ensure simulation results flow into final answer
     - _Requirements: 3.4, 8.13_
-- [ ] 14. Checkpoint - Verify simulation integration
   - Ensure simulation requests create linked cases
   - Verify cases with simulations show simulation_id
   - Verify simulation results are synthesized correctly
@@ -221,8 +221,8 @@ The implementation follows 9 phases, each building on the previous while maintai
 ### Phase 5: API Discovery Subsystem
-- [ ] 15. Create API discovery infrastructure
-  - [ ] 15.1 Create API discovery structure
     - Create `backend/app/services/api_discovery/__init__.py`
     - Create `backend/app/services/api_discovery/catalog_loader.py`
     - Create `backend/app/services/api_discovery/classifier.py`
@@ -230,12 +230,12 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Create `backend/app/services/api_discovery/metadata_store.py`
     - _Requirements: 9.3, 9.4_
-  - [ ] 15.2 Implement catalog loader
     - Implement load_public_apis_catalog() to fetch from GitHub or local cache
     - Parse API entries with name, description, auth, HTTPS, CORS, category, link
     - _Requirements: 9.3, 9.6_
-  - [ ] 15.3 Implement API classifier and scorer
     - Implement classify_api() to categorize APIs by domain
     - Implement score_api_usefulness() to prioritize APIs for integration
     - Consider auth simplicity, HTTPS, CORS, category relevance
@@ -247,7 +247,7 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Add `GET /api-discovery/top-scored` endpoint
     - _Requirements: 9.4_
-- [ ] 16. Checkpoint - Verify API discovery
   - Ensure catalog loads successfully
   - Verify APIs are classified correctly
   - Verify scoring produces reasonable priorities
@@ -255,94 +255,94 @@ The implementation follows 9 phases, each building on the previous while maintai
 ### Phase 6: Frontend Enhancement
-- [ ] 17. Create layout and navigation infrastructure
-  - [ ] 17.1 Create layout components
     - Create `frontend/src/components/layout/Header.tsx` with branding and navigation
     - Create `frontend/src/components/layout/Navigation.tsx` with tab navigation
     - Modify `frontend/src/app/layout.tsx` to use new layout components
     - _Requirements: 12.1, 12.2_
-  - [ ] 17.2 Create common UI components
     - Create `frontend/src/components/common/Badge.tsx` for status indicators
     - Create `frontend/src/components/common/Card.tsx` for content containers
     - Create `frontend/src/components/common/LoadingSpinner.tsx` for loading states
     - Create `frontend/src/components/common/ErrorMessage.tsx` for error display
     - _Requirements: 12.10, 12.11, 12.15_
-  - [ ] 17.3 Create API client and type definitions
     - Create `frontend/src/lib/api.ts` with MiroOrgClient class
     - Implement methods for all backend endpoints
     - Create `frontend/src/lib/types.ts` with TypeScript interfaces
     - _Requirements: 11.21_
-- [ ] 18. Create Main Dashboard page
-  - [ ] 18.1 Implement dashboard with system overview
     - Modify `frontend/src/app/page.tsx` to show quick stats
     - Display recent cases summary
     - Display system health status
     - Add navigation to main features
     - _Requirements: 12.2_
-- [ ] 19. Create Analyze page and components
-  - [ ] 19.1 Create Analyze page structure
     - Create `frontend/src/app/analyze/page.tsx` with analysis interface
     - Create `frontend/src/components/analyze/TaskInput.tsx` for user input
     - Create `frontend/src/components/analyze/ModeSelector.tsx` for mode selection
     - _Requirements: 12.3, 12.7, 12.8_
-  - [ ] 19.2 Create result display components
     - Create `frontend/src/components/analyze/ResultViewer.tsx` for final answers
     - Create `frontend/src/components/analyze/AgentOutputPanel.tsx` for agent outputs
     - Display route/debug badges and confidence indicators
     - _Requirements: 12.9, 12.10, 12.11, 12.14_
-- [ ] 20. Create Cases page and components
-  - [ ] 20.1 Create Cases history interface
     - Create `frontend/src/app/cases/page.tsx` with case list
     - Create `frontend/src/components/cases/CaseList.tsx` for listing cases
     - Create `frontend/src/components/cases/CaseCard.tsx` for case preview
     - _Requirements: 12.4, 12.17_
-  - [ ] 20.2 Create Case detail view
     - Create `frontend/src/app/cases/[id]/page.tsx` for case details
     - Create `frontend/src/components/cases/CaseDetail.tsx` for full case display
     - Display case_id, routing decision, agent outputs, timestamps
     - _Requirements: 12.17_
-- [ ] 21. Create Simulation page and components
-  - [ ] 21.1 Create Simulation submission interface
     - Create `frontend/src/app/simulation/page.tsx` with simulation form
     - Create `frontend/src/components/simulation/SimulationForm.tsx` for input
     - Create `frontend/src/components/simulation/SimulationStatus.tsx` for status display
     - _Requirements: 12.6, 12.13_
-  - [ ] 21.2 Create Simulation detail and chat interface
     - Create `frontend/src/app/simulation/[id]/page.tsx` for simulation details
     - Create `frontend/src/components/simulation/SimulationReport.tsx` for report display
     - Create `frontend/src/components/simulation/SimulationChat.tsx` for post-simulation chat
     - _Requirements: 12.13_
-- [ ] 22. Create Prompt Lab and Config pages
-  - [ ] 22.1 Create Prompt Lab interface
     - Create `frontend/src/app/prompts/page.tsx` with prompt management
     - Create `frontend/src/components/prompts/PromptList.tsx` for listing prompts
     - Create `frontend/src/components/prompts/PromptEditor.tsx` for editing
     - _Requirements: 12.5_
-  - [ ] 22.2 Create Config page
     - Create `frontend/src/app/config/page.tsx` with system configuration view
     - Display provider status, feature flags, health checks
     - _Requirements: 12.1_
-- [ ] 23. Implement dark theme and styling
-  - [ ] 23.1 Update global styles with dark theme
     - Modify `frontend/src/app/globals.css` with dark color palette
     - Implement card-based structure with subtle borders
     - Add animations and transitions
     - Use Inter font family
     - _Requirements: 12.15, 12.16_
-- [ ] 24. Checkpoint - Verify frontend functionality
   - Ensure all pages are accessible via navigation
   - Verify Analyze workflow works end-to-end
   - Verify Case history displays correctly
@@ -353,7 +353,7 @@ The implementation follows 9 phases, each building on the previous while maintai
 ### Phase 7: Testing and Documentation
-- [ ] 25. Write unit tests for core functionality
   - [ ]* 25.1 Write provider abstraction tests
     - Test OpenRouter, Ollama, OpenAI provider calls
     - Test provider fallback behavior
@@ -388,7 +388,7 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Test error handling for disabled MiroFish
     - _Requirements: 8.1, 8.11, 8.12_
-- [ ] 26. Write property-based tests
   - [ ]* 26.1 Write Property 1: Configuration Environment Isolation
     - **Property 1: Configuration Environment Isolation**
     - **Validates: Requirements 1.8, 6.7**
@@ -479,7 +479,7 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Test that new domain packs don't require agent changes
     - Create mock domain pack and verify integration
-- [ ] 27. Write integration tests
   - [ ]* 27.1 Write end-to-end case execution test
     - Test complete workflow from user input to final answer
     - Verify all agents execute correctly
@@ -502,8 +502,8 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Verify finance pack capabilities are used
     - _Requirements: 5.3, 7.1, 7.7_
-- [ ] 28. Create comprehensive documentation
-  - [ ] 28.1 Update main README
     - Update `README.md` with architecture overview
     - Document four-layer architecture
     - Document agent roles and responsibilities
@@ -511,26 +511,26 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Add environment variable reference
     - _Requirements: 13.2, 13.3, 13.4, 13.10, 13.11, 13.12_
-  - [ ] 28.2 Create architecture documentation
     - Create `ARCHITECTURE.md` with detailed architecture description
     - Document component interactions
     - Document data flow
     - _Requirements: 13.10_
-  - [ ] 28.3 Create domain pack documentation
     - Create `DOMAIN_PACKS.md` with domain pack integration guide
     - Document how to create new domain packs
     - Document finance pack capabilities
     - _Requirements: 13.13_
-  - [ ] 28.4 Create testing documentation
     - Create `TESTING.md` with testing strategy and guidelines
     - Document unit test patterns
     - Document property-based test patterns
     - Document integration test patterns
     - _Requirements: 14.1, 14.2, 14.3_
-  - [ ] 28.5 Create deployment documentation
     - Create `DEPLOYMENT.md` with deployment instructions
     - Document environment setup
     - Document dependency installation
@@ -546,27 +546,27 @@ The implementation follows 9 phases, each building on the previous while maintai
 ### Phase 8: Cleanup and Optimization
-- [ ] 30. Remove dead code and optimize performance
-  - [ ] 30.1 Clean up codebase
     - Remove unused imports across all files
     - Remove commented code
     - Remove duplicate implementations
     - _Requirements: 1.5_
-  - [ ] 30.2 Optimize external API performance
     - Add caching for market quotes with 5 minute TTL
     - Verify connection pooling is implemented
     - Verify request timeouts are configured
     - Add rate limiting for external APIs
     - _Requirements: 15.3, 15.4, 15.5, 15.6_
-  - [ ] 30.3 Polish error messages and logging
     - Review all error messages for clarity and consistency
     - Review log levels for appropriateness
     - Add missing log entries for key operations
     - _Requirements: 9.3, 9.8_
-  - [ ] 30.4 Security review
     - Verify no API keys in source code
     - Verify error messages don't leak internals
     - Verify input validation is comprehensive
@@ -579,7 +579,7 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Identify and address bottlenecks
     - _Requirements: 15.1, 15.2_
-- [ ] 31. Final checkpoint - System verification
   - Ensure no dead code remains
   - Verify performance meets requirements
   - Verify error messages are clear and consistent
@@ -589,15 +589,15 @@ The implementation follows 9 phases, each building on the previous while maintai
 ### Phase 9: Autonomous Knowledge Evolution Layer
-- [ ] 32. Create learning subsystem infrastructure
-  - [ ] 32.1 Create learning service structure
     - Create `backend/app/services/learning/__init__.py`
     - Create `backend/app/services/learning/knowledge_ingestor.py`
     - Create `backend/app/services/learning/knowledge_store.py`
     - Create `backend/app/services/learning/learning_engine.py`
     - _Requirements: 17.1, 17.2, 17.3_
-  - [ ] 32.2 Create additional learning services
     - Create `backend/app/services/learning/prompt_optimizer.py`
     - Create `backend/app/services/learning/skill_distiller.py`
     - Create `backend/app/services/learning/trust_manager.py`
@@ -605,22 +605,22 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Create `backend/app/services/learning/scheduler.py`
     - _Requirements: 17.1, 17.17, 17.23, 17.28, 17.40_
-  - [ ] 32.3 Create data directories
     - Create `backend/app/data/knowledge/` directory
     - Create `backend/app/data/skills/` directory
     - Create `backend/app/data/prompt_versions/` directory
     - Create `backend/app/data/learning/` directory
     - _Requirements: 17.33, 17.34, 17.35, 17.36_
-- [ ] 33. Implement knowledge ingestion and storage
-  - [ ] 33.1 Implement knowledge ingestion
     - Implement ingest_from_search() using Tavily API
     - Implement ingest_from_url() using Jina Reader
     - Implement ingest_from_news() using NewsAPI
     - Implement compress_content() for summarization (2-4KB limit)
     - _Requirements: 17.8, 17.9, 17.10, 17.11_
-  - [ ] 33.2 Implement knowledge store
     - Implement save_knowledge() with JSON storage
     - Implement get_knowledge() and search_knowledge()
     - Implement delete_expired_knowledge() with auto-cleanup
@@ -628,32 +628,32 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Implement LRU eviction when limit reached
     - _Requirements: 17.4, 17.5, 17.33, 17.38_
-  - [ ] 33.3 Add knowledge schemas
     - Add KnowledgeItem schema to `backend/app/schemas.py`
     - Add validation for summary length (2-4KB)
     - Add trust_score and freshness_score fields
     - _Requirements: 17.9_
-- [ ] 34. Implement experience learning
-  - [ ] 34.1 Implement case learning
     - Implement learn_from_case() to extract metadata
     - Implement detect_patterns() for repeated patterns
     - Implement get_route_effectiveness() for routing insights
     - Implement get_prompt_performance() for prompt insights
     - _Requirements: 17.13, 17.14, 17.15, 17.16_
-  - [ ] 34.2 Add case learning schemas
     - Add CaseLearning schema to `backend/app/schemas.py`
     - Add fields for route_effectiveness, prompt_performance, provider_reliability
     - _Requirements: 17.13_
-  - [ ] 34.3 Hook learning into case save flow
     - Modify `backend/app/services/case_store.py` to call learn_from_case()
     - Store case learning metadata separately
     - _Requirements: 17.44_
-- [ ] 35. Implement prompt evolution
-  - [ ] 35.1 Implement prompt versioning
     - Implement create_prompt_variant() using provider API
     - Implement test_prompt_variant() with quality metrics
     - Implement compare_prompts() for A/B testing
@@ -661,66 +661,66 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Implement archive_prompt() for old versions
     - _Requirements: 17.17, 17.18, 17.19, 17.20, 17.21, 17.22_
-  - [ ] 35.2 Add prompt version schemas
     - Add PromptVersion schema to `backend/app/schemas.py`
     - Add fields for version, status, win_rate, test_count
     - _Requirements: 17.17_
-  - [ ] 35.3 Integrate with prompt management
     - Hook prompt versions into prompt loading
     - Store prompt history in prompt_versions directory
     - _Requirements: 17.47_
-- [ ] 36. Implement skill distillation
-  - [ ] 36.1 Implement skill detection and creation
     - Implement detect_skill_candidates() from patterns
     - Implement distill_skill() to create skill records
     - Implement test_skill() for validation
     - Implement apply_skill() for skill usage
     - _Requirements: 17.23, 17.24, 17.25, 17.26, 17.27_
-  - [ ] 36.2 Add skill schemas
     - Add Skill schema to `backend/app/schemas.py`
     - Add fields for trigger_patterns, recommended_agents, preferred_sources
     - _Requirements: 17.24_
-  - [ ] 36.3 Integrate skills with agents
     - Hook skill application into agent execution
     - Store skills in skills directory
     - _Requirements: 17.45_
-- [ ] 37. Implement trust and freshness management
-  - [ ] 37.1 Implement trust management
     - Implement get_trust_score() and update_trust()
     - Implement list_trusted_sources() and list_untrusted_sources()
     - Track verification outcomes
     - _Requirements: 17.28, 17.29, 17.30, 17.31, 17.32_
-  - [ ] 37.2 Implement freshness management
     - Implement calculate_freshness() with domain-specific rules
     - Implement update_freshness() and get_stale_items()
     - Implement recommend_refresh() for stale items
     - _Requirements: 17.28, 17.29, 17.30, 17.31_
-  - [ ] 37.3 Add trust and freshness schemas
     - Add SourceTrust schema to `backend/app/schemas.py`
     - Add FreshnessScore schema to `backend/app/schemas.py`
     - _Requirements: 17.28_
-  - [ ] 37.4 Integrate with source selection
     - Hook trust scores into research agent source selection
     - Hook freshness scores into knowledge retrieval
     - _Requirements: 17.46_
-- [ ] 38. Implement learning scheduler
-  - [ ] 38.1 Implement scheduler with safeguards
     - Implement schedule_task() with interval configuration
     - Implement is_system_idle() to check CPU usage
     - Implement is_battery_ok() to check battery level
     - Implement run_once() for manual triggers
     - _Requirements: 17.40, 17.41, 17.42, 17.43_
-  - [ ] 38.2 Add scheduled tasks
     - Schedule knowledge ingestion (every 6 hours)
     - Schedule expired knowledge cleanup (daily)
     - Schedule pattern detection (daily)
@@ -728,7 +728,7 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Schedule prompt optimization (weekly)
     - _Requirements: 17.6, 17.7, 17.40_
-  - [ ] 38.3 Add scheduler configuration
     - Add LEARNING_ENABLED flag to config
     - Add KNOWLEDGE_MAX_SIZE_MB (default 200)
     - Add LEARNING_SCHEDULE_INTERVAL
@@ -736,53 +736,53 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Add domain-specific expiration rules
     - _Requirements: 17.4, 17.5, 17.6, 17.7, 17.12_
-- [ ] 39. Add learning API endpoints
-  - [ ] 39.1 Add learning status endpoints
     - Add GET /learning/status endpoint
     - Add POST /learning/run-once endpoint
     - Add GET /learning/insights endpoint
     - _Requirements: 17.49, 17.50, 17.51_
-  - [ ] 39.2 Add knowledge endpoints
     - Add GET /knowledge endpoint for listing
     - Add GET /knowledge/{item_id} endpoint for details
     - Add GET /knowledge/search endpoint with query parameter
     - _Requirements: 17.52, 17.53, 17.54_
-  - [ ] 39.3 Add skill endpoints
     - Add GET /skills endpoint for listing
     - Add GET /skills/{skill_name} endpoint for details
     - Add POST /skills/distill endpoint for manual distillation
     - _Requirements: 17.55, 17.56, 17.57_
-  - [ ] 39.4 Add trust and freshness endpoints
     - Add GET /sources/trust endpoint
     - Add GET /sources/freshness endpoint
     - _Requirements: 17.58, 17.59_
-  - [ ] 39.5 Add prompt evolution endpoints
     - Add GET /prompts/versions/{name} endpoint
     - Add POST /prompts/optimize/{name} endpoint
     - Add POST /prompts/promote/{name}/{version} endpoint
     - _Requirements: 17.60, 17.61, 17.62_
-- [ ] 40. Integrate learning layer with existing system
-  - [ ] 40.1 Integrate with case execution
     - Hook learn_from_case() into case save flow
     - Store case learning metadata
     - _Requirements: 17.44_
-  - [ ] 40.2 Integrate with research agent
     - Hook knowledge search into research agent
     - Use trust scores for source selection
     - _Requirements: 17.45, 17.46_
-  - [ ] 40.3 Integrate with simulation
     - Learn from simulation outcomes
     - Store simulation insights
     - _Requirements: 17.46_
-  - [ ] 40.4 Integrate with prompt management
     - Hook prompt versions into prompt loading
     - Track prompt performance
     - _Requirements: 17.47_
@@ -820,7 +820,7 @@ The implementation follows 9 phases, each building on the previous while maintai
     - Test scheduler respects rate limits
     - _Requirements: 17.6, 17.7, 17.40, 17.41, 17.42_
-- [ ] 42. Final checkpoint - Learning layer verification
   - Ensure learning subsystem runs without stressing laptop
   - Verify knowledge cache stays under 200MB
   - Verify scheduler respects battery and CPU constraints

 ### Phase 3: Agent Enhancement with Domain Intelligence
+- [x] 7. Enhance Switchboard with domain detection
+  - [x] 7.1 Add domain pack dimension to routing
     - Modify `backend/app/agents/switchboard.py` to add domain_pack to routing decision
     - Implement domain detection using domain registry
     - Update RouteDecision schema in `backend/app/schemas.py`
     - _Requirements: 4.1, 5.6_
+  - [x] 7.2 Implement complexity-based routing logic
     - Ensure simple queries (≤5 words) route to solo mode
     - Ensure medium queries (≤25 words) route to standard mode
     - Ensure complex queries (>25 words) route to deep mode
     - _Requirements: 4.2, 4.3, 4.4_
+  - [x] 7.3 Implement simulation keyword detection
     - Add simulation trigger keyword detection
     - Load keywords from environment configuration
     - Set task_family="simulation" when keywords detected
     - _Requirements: 4.5, 4.6, 4.7_
+- [x] 8. Enhance Research Agent with domain capabilities
+  - [x] 8.1 Integrate domain pack research enhancement
     - Modify `backend/app/agents/research.py` to detect domain
     - Call domain pack enhance_research() when domain detected
     - Add structured entity extraction output
     - _Requirements: 5.3, 7.1, 7.2, 7.3, 7.4_
+  - [x] 8.2 Update research agent prompt
     - Update `backend/app/prompts/research.txt` with domain intelligence instructions
     - Add instructions for entity and ticker extraction
     - Add instructions for structured output
     - _Requirements: 7.14_
+- [x] 9. Enhance Verifier Agent with domain capabilities
+  - [x] 9.1 Integrate domain pack verification enhancement
     - Modify `backend/app/agents/verifier.py` to detect domain
     - Call domain pack enhance_verification() when domain detected
     - Add structured credibility scoring output
     - _Requirements: 5.3, 7.7, 7.8, 7.9_
+  - [x] 9.2 Update verifier agent prompt
     - Update `backend/app/prompts/verifier.txt` with domain intelligence instructions
     - Add instructions for rumor and scam detection
     - Add instructions for uncertainty surfacing
     - _Requirements: 7.14_
+- [x] 10. Enhance Planner and Synthesizer Agents
+  - [x] 10.1 Add simulation mode suggestion to Planner
     - Modify `backend/app/agents/planner.py` to detect simulation opportunities
     - Add simulation_suggested field to output
     - Update `backend/app/prompts/planner.txt` with simulation guidance
     - _Requirements: 7.6, 7.14_
+  - [x] 10.2 Add uncertainty quantification to Synthesizer
     - Modify `backend/app/agents/synthesizer.py` to quantify uncertainty
     - Add simulation recommendation logic
     - Update `backend/app/prompts/synthesizer.txt` with uncertainty instructions
     - _Requirements: 7.11, 7.12, 7.13, 7.14_
+- [x] 11. Update graph execution with domain context
+  - [x] 11.1 Pass domain pack context through pipeline
     - Modify `backend/app/graph.py` to detect domain early
     - Pass domain context to all agents
     - Ensure domain-enhanced execution flows correctly
     - _Requirements: 2.5, 5.3, 7.15_
+- [x] 12. Checkpoint - Verify agent enhancements
   - Ensure Switchboard detects finance domain correctly
   - Verify Research agent extracts entities and tickers
   - Verify Verifier agent scores credibility
 ### Phase 4: Simulation Integration Enhancement
+- [x] 13. Enhance simulation workflow and case linking
+  - [x] 13.1 Add case linking to simulation router
     - Modify `backend/app/routers/simulation.py` to link case_id
     - Improve error messages for MiroFish failures
     - Add better status reporting
     - _Requirements: 8.1, 8.11, 8.12_
+  - [x] 13.2 Enhance simulation store with search and filtering
     - Modify `backend/app/services/simulation_store.py`
     - Add simulation search by title or prediction_goal
     - Add simulation filtering by status
     - _Requirements: 8.10_
+  - [x] 13.3 Update case storage for simulation linking
     - Modify `backend/app/services/case_store.py` to add simulation_id field
     - Add case-to-simulation lookup functionality
     - Update CaseRecord schema in `backend/app/schemas.py`
     - _Requirements: 10.8, 14.9_
+  - [x] 13.4 Add simulation workflow to graph execution
     - Modify `backend/app/graph.py` to add simulation handoff logic
     - Add simulation result synthesis
     - Ensure simulation results flow into final answer
     - _Requirements: 3.4, 8.13_
+- [x] 14. Checkpoint - Verify simulation integration
   - Ensure simulation requests create linked cases
   - Verify cases with simulations show simulation_id
   - Verify simulation results are synthesized correctly
 ### Phase 5: API Discovery Subsystem
+- [x] 15. Create API discovery infrastructure
+  - [x] 15.1 Create API discovery structure
     - Create `backend/app/services/api_discovery/__init__.py`
     - Create `backend/app/services/api_discovery/catalog_loader.py`
     - Create `backend/app/services/api_discovery/classifier.py`
     - Create `backend/app/services/api_discovery/metadata_store.py`
     - _Requirements: 9.3, 9.4_
+  - [x] 15.2 Implement catalog loader
     - Implement load_public_apis_catalog() to fetch from GitHub or local cache
     - Parse API entries with name, description, auth, HTTPS, CORS, category, link
     - _Requirements: 9.3, 9.6_
+  - [x] 15.3 Implement API classifier and scorer
     - Implement classify_api() to categorize APIs by domain
     - Implement score_api_usefulness() to prioritize APIs for integration
     - Consider auth simplicity, HTTPS, CORS, category relevance
     - Add `GET /api-discovery/top-scored` endpoint
     - _Requirements: 9.4_
+- [x] 16. Checkpoint - Verify API discovery
   - Ensure catalog loads successfully
   - Verify APIs are classified correctly
   - Verify scoring produces reasonable priorities
 ### Phase 6: Frontend Enhancement
+- [x] 17. Create layout and navigation infrastructure
+  - [x] 17.1 Create layout components
     - Create `frontend/src/components/layout/Header.tsx` with branding and navigation
     - Create `frontend/src/components/layout/Navigation.tsx` with tab navigation
     - Modify `frontend/src/app/layout.tsx` to use new layout components
     - _Requirements: 12.1, 12.2_
+  - [x] 17.2 Create common UI components
     - Create `frontend/src/components/common/Badge.tsx` for status indicators
     - Create `frontend/src/components/common/Card.tsx` for content containers
     - Create `frontend/src/components/common/LoadingSpinner.tsx` for loading states
     - Create `frontend/src/components/common/ErrorMessage.tsx` for error display
     - _Requirements: 12.10, 12.11, 12.15_
+  - [x] 17.3 Create API client and type definitions
     - Create `frontend/src/lib/api.ts` with MiroOrgClient class
     - Implement methods for all backend endpoints
     - Create `frontend/src/lib/types.ts` with TypeScript interfaces
     - _Requirements: 11.21_
+- [x] 18. Create Main Dashboard page
+  - [x] 18.1 Implement dashboard with system overview
     - Modify `frontend/src/app/page.tsx` to show quick stats
     - Display recent cases summary
     - Display system health status
     - Add navigation to main features
     - _Requirements: 12.2_
+- [x] 19. Create Analyze page and components
+  - [x] 19.1 Create Analyze page structure
     - Create `frontend/src/app/analyze/page.tsx` with analysis interface
     - Create `frontend/src/components/analyze/TaskInput.tsx` for user input
     - Create `frontend/src/components/analyze/ModeSelector.tsx` for mode selection
     - _Requirements: 12.3, 12.7, 12.8_
+  - [x] 19.2 Create result display components
     - Create `frontend/src/components/analyze/ResultViewer.tsx` for final answers
     - Create `frontend/src/components/analyze/AgentOutputPanel.tsx` for agent outputs
     - Display route/debug badges and confidence indicators
     - _Requirements: 12.9, 12.10, 12.11, 12.14_
+- [x] 20. Create Cases page and components
+  - [x] 20.1 Create Cases history interface
     - Create `frontend/src/app/cases/page.tsx` with case list
     - Create `frontend/src/components/cases/CaseList.tsx` for listing cases
     - Create `frontend/src/components/cases/CaseCard.tsx` for case preview
     - _Requirements: 12.4, 12.17_
+  - [x] 20.2 Create Case detail view
     - Create `frontend/src/app/cases/[id]/page.tsx` for case details
     - Create `frontend/src/components/cases/CaseDetail.tsx` for full case display
     - Display case_id, routing decision, agent outputs, timestamps
     - _Requirements: 12.17_
+- [x] 21. Create Simulation page and components
+  - [x] 21.1 Create Simulation submission interface
     - Create `frontend/src/app/simulation/page.tsx` with simulation form
     - Create `frontend/src/components/simulation/SimulationForm.tsx` for input
     - Create `frontend/src/components/simulation/SimulationStatus.tsx` for status display
     - _Requirements: 12.6, 12.13_
+  - [x] 21.2 Create Simulation detail and chat interface
     - Create `frontend/src/app/simulation/[id]/page.tsx` for simulation details
     - Create `frontend/src/components/simulation/SimulationReport.tsx` for report display
     - Create `frontend/src/components/simulation/SimulationChat.tsx` for post-simulation chat
     - _Requirements: 12.13_
+- [x] 22. Create Prompt Lab and Config pages
+  - [x] 22.1 Create Prompt Lab interface
     - Create `frontend/src/app/prompts/page.tsx` with prompt management
     - Create `frontend/src/components/prompts/PromptList.tsx` for listing prompts
     - Create `frontend/src/components/prompts/PromptEditor.tsx` for editing
     - _Requirements: 12.5_
+  - [x] 22.2 Create Config page
     - Create `frontend/src/app/config/page.tsx` with system configuration view
     - Display provider status, feature flags, health checks
     - _Requirements: 12.1_
+- [x] 23. Implement dark theme and styling
+  - [x] 23.1 Update global styles with dark theme
     - Modify `frontend/src/app/globals.css` with dark color palette
     - Implement card-based structure with subtle borders
     - Add animations and transitions
     - Use Inter font family
     - _Requirements: 12.15, 12.16_
+- [x] 24. Checkpoint - Verify frontend functionality
   - Ensure all pages are accessible via navigation
   - Verify Analyze workflow works end-to-end
   - Verify Case history displays correctly
 ### Phase 7: Testing and Documentation
+- [~] 25. Write unit tests for core functionality
   - [ ]* 25.1 Write provider abstraction tests
     - Test OpenRouter, Ollama, OpenAI provider calls
     - Test provider fallback behavior
     - Test error handling for disabled MiroFish
     - _Requirements: 8.1, 8.11, 8.12_
+- [~] 26. Write property-based tests
   - [ ]* 26.1 Write Property 1: Configuration Environment Isolation
     - **Property 1: Configuration Environment Isolation**
     - **Validates: Requirements 1.8, 6.7**
     - Test that new domain packs don't require agent changes
     - Create mock domain pack and verify integration
+- [~] 27. Write integration tests
   - [ ]* 27.1 Write end-to-end case execution test
     - Test complete workflow from user input to final answer
     - Verify all agents execute correctly
     - Verify finance pack capabilities are used
     - _Requirements: 5.3, 7.1, 7.7_
+- [x] 28. Create comprehensive documentation
+  - [x] 28.1 Update main README
     - Update `README.md` with architecture overview
     - Document four-layer architecture
     - Document agent roles and responsibilities
     - Add environment variable reference
     - _Requirements: 13.2, 13.3, 13.4, 13.10, 13.11, 13.12_
+  - [x] 28.2 Create architecture documentation
     - Create `ARCHITECTURE.md` with detailed architecture description
     - Document component interactions
     - Document data flow
     - _Requirements: 13.10_
+  - [x] 28.3 Create domain pack documentation
     - Create `DOMAIN_PACKS.md` with domain pack integration guide
     - Document how to create new domain packs
     - Document finance pack capabilities
     - _Requirements: 13.13_
+  - [x] 28.4 Create testing documentation
     - Create `TESTING.md` with testing strategy and guidelines
     - Document unit test patterns
     - Document property-based test patterns
     - Document integration test patterns
     - _Requirements: 14.1, 14.2, 14.3_
+  - [x] 28.5 Create deployment documentation
     - Create `DEPLOYMENT.md` with deployment instructions
     - Document environment setup
     - Document dependency installation
 ### Phase 8: Cleanup and Optimization
+- [x] 30. Remove dead code and optimize performance
+  - [x] 30.1 Clean up codebase
     - Remove unused imports across all files
     - Remove commented code
     - Remove duplicate implementations
     - _Requirements: 1.5_
+  - [x] 30.2 Optimize external API performance
     - Add caching for market quotes with 5 minute TTL
     - Verify connection pooling is implemented
     - Verify request timeouts are configured
     - Add rate limiting for external APIs
     - _Requirements: 15.3, 15.4, 15.5, 15.6_
+  - [x] 30.3 Polish error messages and logging
     - Review all error messages for clarity and consistency
     - Review log levels for appropriateness
     - Add missing log entries for key operations
     - _Requirements: 9.3, 9.8_
+  - [x] 30.4 Security review
     - Verify no API keys in source code
     - Verify error messages don't leak internals
     - Verify input validation is comprehensive
     - Identify and address bottlenecks
     - _Requirements: 15.1, 15.2_
+- [x] 31. Final checkpoint - System verification
   - Ensure no dead code remains
   - Verify performance meets requirements
   - Verify error messages are clear and consistent
 ### Phase 9: Autonomous Knowledge Evolution Layer
+- [x] 32. Create learning subsystem infrastructure
+  - [x] 32.1 Create learning service structure
     - Create `backend/app/services/learning/__init__.py`
     - Create `backend/app/services/learning/knowledge_ingestor.py`
     - Create `backend/app/services/learning/knowledge_store.py`
     - Create `backend/app/services/learning/learning_engine.py`
     - _Requirements: 17.1, 17.2, 17.3_
+  - [x] 32.2 Create additional learning services
     - Create `backend/app/services/learning/prompt_optimizer.py`
     - Create `backend/app/services/learning/skill_distiller.py`
     - Create `backend/app/services/learning/trust_manager.py`
     - Create `backend/app/services/learning/scheduler.py`
     - _Requirements: 17.1, 17.17, 17.23, 17.28, 17.40_
+  - [x] 32.3 Create data directories
     - Create `backend/app/data/knowledge/` directory
     - Create `backend/app/data/skills/` directory
     - Create `backend/app/data/prompt_versions/` directory
     - Create `backend/app/data/learning/` directory
     - _Requirements: 17.33, 17.34, 17.35, 17.36_
+- [x] 33. Implement knowledge ingestion and storage
+  - [x] 33.1 Implement knowledge ingestion
     - Implement ingest_from_search() using Tavily API
     - Implement ingest_from_url() using Jina Reader
     - Implement ingest_from_news() using NewsAPI
     - Implement compress_content() for summarization (2-4KB limit)
     - _Requirements: 17.8, 17.9, 17.10, 17.11_
+  - [x] 33.2 Implement knowledge store
     - Implement save_knowledge() with JSON storage
     - Implement get_knowledge() and search_knowledge()
     - Implement delete_expired_knowledge() with auto-cleanup
     - Implement LRU eviction when limit reached
     - _Requirements: 17.4, 17.5, 17.33, 17.38_
+  - [x] 33.3 Add knowledge schemas
     - Add KnowledgeItem schema to `backend/app/schemas.py`
     - Add validation for summary length (2-4KB)
     - Add trust_score and freshness_score fields
     - _Requirements: 17.9_
+- [x] 34. Implement experience learning
+  - [x] 34.1 Implement case learning
     - Implement learn_from_case() to extract metadata
     - Implement detect_patterns() for repeated patterns
     - Implement get_route_effectiveness() for routing insights
     - Implement get_prompt_performance() for prompt insights
     - _Requirements: 17.13, 17.14, 17.15, 17.16_
+  - [x] 34.2 Add case learning schemas
     - Add CaseLearning schema to `backend/app/schemas.py`
     - Add fields for route_effectiveness, prompt_performance, provider_reliability
     - _Requirements: 17.13_
+  - [x] 34.3 Hook learning into case save flow
     - Modify `backend/app/services/case_store.py` to call learn_from_case()
     - Store case learning metadata separately
     - _Requirements: 17.44_
+- [x] 35. Implement prompt evolution
+  - [x] 35.1 Implement prompt versioning
     - Implement create_prompt_variant() using provider API
     - Implement test_prompt_variant() with quality metrics
     - Implement compare_prompts() for A/B testing
     - Implement archive_prompt() for old versions
     - _Requirements: 17.17, 17.18, 17.19, 17.20, 17.21, 17.22_
+  - [x] 35.2 Add prompt version schemas
     - Add PromptVersion schema to `backend/app/schemas.py`
     - Add fields for version, status, win_rate, test_count
     - _Requirements: 17.17_
+  - [x] 35.3 Integrate with prompt management
     - Hook prompt versions into prompt loading
     - Store prompt history in prompt_versions directory
     - _Requirements: 17.47_
+- [x] 36. Implement skill distillation
+  - [x] 36.1 Implement skill detection and creation
     - Implement detect_skill_candidates() from patterns
     - Implement distill_skill() to create skill records
     - Implement test_skill() for validation
     - Implement apply_skill() for skill usage
     - _Requirements: 17.23, 17.24, 17.25, 17.26, 17.27_
+  - [x] 36.2 Add skill schemas
     - Add Skill schema to `backend/app/schemas.py`
     - Add fields for trigger_patterns, recommended_agents, preferred_sources
     - _Requirements: 17.24_
+  - [x] 36.3 Integrate skills with agents
     - Hook skill application into agent execution
     - Store skills in skills directory
     - _Requirements: 17.45_
+- [x] 37. Implement trust and freshness management
+  - [x] 37.1 Implement trust management
     - Implement get_trust_score() and update_trust()
     - Implement list_trusted_sources() and list_untrusted_sources()
     - Track verification outcomes
     - _Requirements: 17.28, 17.29, 17.30, 17.31, 17.32_
+  - [x] 37.2 Implement freshness management
     - Implement calculate_freshness() with domain-specific rules
     - Implement update_freshness() and get_stale_items()
     - Implement recommend_refresh() for stale items
     - _Requirements: 17.28, 17.29, 17.30, 17.31_
+  - [x] 37.3 Add trust and freshness schemas
     - Add SourceTrust schema to `backend/app/schemas.py`
     - Add FreshnessScore schema to `backend/app/schemas.py`
     - _Requirements: 17.28_
+  - [x] 37.4 Integrate with source selection
     - Hook trust scores into research agent source selection
     - Hook freshness scores into knowledge retrieval
     - _Requirements: 17.46_
+- [x] 38. Implement learning scheduler
+  - [x] 38.1 Implement scheduler with safeguards
     - Implement schedule_task() with interval configuration
     - Implement is_system_idle() to check CPU usage
     - Implement is_battery_ok() to check battery level
     - Implement run_once() for manual triggers
     - _Requirements: 17.40, 17.41, 17.42, 17.43_
+  - [x] 38.2 Add scheduled tasks
     - Schedule knowledge ingestion (every 6 hours)
     - Schedule expired knowledge cleanup (daily)
     - Schedule pattern detection (daily)
     - Schedule prompt optimization (weekly)
     - _Requirements: 17.6, 17.7, 17.40_
+  - [x] 38.3 Add scheduler configuration
     - Add LEARNING_ENABLED flag to config
     - Add KNOWLEDGE_MAX_SIZE_MB (default 200)
     - Add LEARNING_SCHEDULE_INTERVAL
     - Add domain-specific expiration rules
     - _Requirements: 17.4, 17.5, 17.6, 17.7, 17.12_
+- [x] 39. Add learning API endpoints
+  - [x] 39.1 Add learning status endpoints
     - Add GET /learning/status endpoint
     - Add POST /learning/run-once endpoint
     - Add GET /learning/insights endpoint
     - _Requirements: 17.49, 17.50, 17.51_
+  - [x] 39.2 Add knowledge endpoints
     - Add GET /knowledge endpoint for listing
     - Add GET /knowledge/{item_id} endpoint for details
     - Add GET /knowledge/search endpoint with query parameter
     - _Requirements: 17.52, 17.53, 17.54_
+  - [x] 39.3 Add skill endpoints
     - Add GET /skills endpoint for listing
     - Add GET /skills/{skill_name} endpoint for details
     - Add POST /skills/distill endpoint for manual distillation
     - _Requirements: 17.55, 17.56, 17.57_
+  - [x] 39.4 Add trust and freshness endpoints
     - Add GET /sources/trust endpoint
     - Add GET /sources/freshness endpoint
     - _Requirements: 17.58, 17.59_
+  - [x] 39.5 Add prompt evolution endpoints
     - Add GET /prompts/versions/{name} endpoint
     - Add POST /prompts/optimize/{name} endpoint
     - Add POST /prompts/promote/{name}/{version} endpoint
     - _Requirements: 17.60, 17.61, 17.62_
+- [x] 40. Integrate learning layer with existing system
+  - [x] 40.1 Integrate with case execution
     - Hook learn_from_case() into case save flow
     - Store case learning metadata
     - _Requirements: 17.44_
+  - [x] 40.2 Integrate with research agent
     - Hook knowledge search into research agent
     - Use trust scores for source selection
     - _Requirements: 17.45, 17.46_
+  - [x] 40.3 Integrate with simulation
     - Learn from simulation outcomes
     - Store simulation insights
     - _Requirements: 17.46_
+  - [x] 40.4 Integrate with prompt management
     - Hook prompt versions into prompt loading
     - Track prompt performance
     - _Requirements: 17.47_
     - Test scheduler respects rate limits
     - _Requirements: 17.6, 17.7, 17.40, 17.41, 17.42_
+- [x] 42. Final checkpoint - Learning layer verification
   - Ensure learning subsystem runs without stressing laptop
   - Verify knowledge cache stays under 200MB
   - Verify scheduler respects battery and CPU constraints

ARCHITECTURE.md ADDED Viewed

	@@ -0,0 +1,567 @@

+# MiroOrg v1.1 Architecture
+## System Overview
+MiroOrg v1.1 is a 5-layer AI intelligence system designed for single-user local deployment with autonomous learning capabilities.
+## Five-Layer Architecture
+```
+┌─────────────────────────────────────────────────────────────┐
+│  Layer 5: Autonomous Knowledge Evolution                    │
+│  - Knowledge ingestion from web/news                        │
+│  - Experience learning from cases                           │
+│  - Prompt evolution via A/B testing                         │
+│  - Skill distillation from patterns                         │
+│  - Trust & freshness management                             │
+└─────────────────────────────────────────────────────────────┘
+                            ↓
+┌─────────────────────────────────────────────────────────────┐
+│  Layer 4: Simulation Lab Integration                        │
+│  - MiroFish adapter for scenario modeling                   │
+│  - Case-simulation linking                                  │
+│  - Simulation result synthesis                              │
+└─────────────────────────────────────────────────────────────┘
+                            ↓
+┌─────────────────────────────────────────────────────────────┐
+│  Layer 3: Agent Orchestration                               │
+│  - Switchboard (4D routing)                                 │
+│  - Research (domain-enhanced)                               │
+│  - Verifier (credibility scoring)                           │
+│  - Planner (simulation-aware)                               │
+│  - Synthesizer (uncertainty quantification)                 │
+└─────────────────────────────────────────────────────────────┘
+                            ↓
+┌─────────────────────────────────────────────────────────────┐
+│  Layer 2: Domain Intelligence                               │
+│  - Pluggable domain packs                                   │
+│  - Finance pack (market data, news, analysis)               │
+│  - Domain registry & detection                              │
+└─────────────────────────────────────────────────────────────┘
+                            ↓
+┌─────────────────────────────────────────────────────────────┐
+│  Layer 1: Core Platform                                     │
+│  - Multi-provider LLM (OpenRouter/Ollama/OpenAI)           │
+│  - Automatic fallback                                       │
+│  - Configuration management                                 │
+│  - Data persistence                                         │
+└─────────────────────────────────────────────────────────────┘
+```
+## Layer 1: Core Platform
+### Multi-Provider LLM Abstraction
+The system supports three LLM providers with automatic fallback:
+1. **OpenRouter** - Recommended for production
+   - Access to multiple models
+   - Pay-per-use pricing
+   - High availability
+2. **Ollama** - Local deployment
+   - Privacy-focused
+   - No API costs
+   - Requires local GPU/CPU
+3. **OpenAI** - High quality
+   - GPT-4 and GPT-3.5
+   - Reliable performance
+   - Higher cost
+**Fallback Chain:**
+```
+Primary Provider → Fallback Provider → Error
+```
+### Configuration Management
+All configuration via environment variables:
+- Provider selection
+- API keys
+- Feature flags
+- Domain pack enablement
+- Learning layer settings
+### Data Persistence
+JSON-based storage for single-user deployment:
+- Cases: `backend/app/data/memory/`
+- Simulations: `backend/app/data/simulations/`
+- Knowledge: `backend/app/data/knowledge/`
+- Skills: `backend/app/data/skills/`
+- Prompt versions: `backend/app/data/prompt_versions/`
+## Layer 2: Domain Intelligence
+### Domain Pack System
+Domain packs are pluggable modules that enhance agent capabilities:
+```python
+class DomainPack(ABC):
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Domain pack name (e.g., 'finance')"""
+        pass
+    @property
+    @abstractmethod
+    def keywords(self) -> List[str]:
+        """Keywords for domain detection"""
+        pass
+    @abstractmethod
+    async def enhance_research(self, query: str, context: Dict) -> Dict:
+        """Enhance research with domain-specific capabilities"""
+        pass
+    @abstractmethod
+    async def enhance_verification(self, claims: List[str], context: Dict) -> Dict:
+        """Enhance verification with domain-specific checks"""
+        pass
+    @abstractmethod
+    def get_capabilities(self) -> List[str]:
+        """List domain-specific capabilities"""
+        pass
+```
+### Finance Domain Pack
+Included capabilities:
+- **Market Data**: Real-time quotes via Alpha Vantage
+- **News**: Financial news via NewsAPI
+- **Entity Resolution**: Extract and normalize company names
+- **Ticker Resolution**: Map companies to stock tickers
+- **Source Checking**: Credibility scoring for financial sources
+- **Rumor Detection**: Identify unverified claims
+- **Scam Detection**: Flag potential scams
+- **Stance Detection**: Analyze sentiment and stance
+- **Event Analysis**: Analyze market-moving events
+- **Prediction**: Generate market predictions
+### Domain Detection
+Automatic domain detection based on keywords:
+```python
+# Finance keywords
+["stock", "market", "trading", "investment", "portfolio",
+ "earnings", "dividend", "IPO", "merger", "acquisition"]
+```
+## Layer 3: Agent Orchestration
+### Agent Pipeline
+```
+User Input
+    ↓
+Switchboard (4D Routing)
+    ↓
+Research (Domain-Enhanced)
+    ↓
+Planner (Simulation-Aware)
+    ↓
+Verifier (Credibility Scoring)
+    ↓
+Synthesizer (Final Answer)
+    ↓
+Case Storage
+```
+### Switchboard Agent
+**Four-Dimensional Classification:**
+1. **Task Family**
+   - `normal`: Standard analysis
+   - `simulation`: Scenario modeling
+2. **Domain Pack**
+   - `finance`: Financial domain
+   - `general`: General knowledge
+   - `policy`: Policy analysis
+   - `custom`: Custom domains
+3. **Complexity**
+   - `simple`: ≤5 words
+   - `medium`: 6-25 words
+   - `complex`: >25 words
+4. **Execution Mode**
+   - `solo`: Synthesizer only (simple queries)
+   - `standard`: Research → Synthesizer (medium queries)
+   - `deep`: Full pipeline (complex queries)
+### Research Agent
+**Capabilities:**
+- Web search via Tavily
+- News search via NewsAPI
+- Domain-specific data sources
+- Entity extraction
+- Source credibility assessment
+**Domain Enhancement:**
+```python
+if domain == "finance":
+    # Extract tickers and entities
+    # Fetch market data
+    # Get financial news
+    # Score source credibility
+```
+### Verifier Agent
+**Verification Process:**
+1. Extract claims from research
+2. Check source reliability
+3. Detect rumors and scams
+4. Quantify uncertainty
+5. Flag high-risk claims
+**Domain Enhancement:**
+```python
+if domain == "finance":
+    # Check financial source credibility
+    # Detect market manipulation
+    # Verify regulatory compliance
+```
+### Planner Agent
+**Planning Process:**
+1. Analyze research findings
+2. Identify action items
+3. Detect simulation opportunities
+4. Create structured plan
+**Simulation Detection:**
+- Keywords: "predict", "what if", "scenario", "impact"
+- Uncertainty level: High
+- Recommendation: Suggest simulation mode
+### Synthesizer Agent
+**Synthesis Process:**
+1. Combine all agent outputs
+2. Quantify uncertainty
+3. Generate final answer
+4. Add metadata (confidence, sources, etc.)
+**Output Structure:**
+```json
+{
+  "summary": "Final answer",
+  "confidence": 0.85,
+  "uncertainty_factors": [...],
+  "sources": [...],
+  "simulation_recommended": false
+}
+```
+## Layer 4: Simulation Lab Integration
+### MiroFish Adapter
+Adapter pattern for simulation integration:
+```python
+class MiroFishClient:
+    async def submit_simulation(self, title, seed_text, prediction_goal):
+        """Submit simulation to MiroFish"""
+    async def get_status(self, simulation_id):
+        """Get simulation status"""
+    async def get_report(self, simulation_id):
+        """Get simulation report"""
+    async def chat(self, simulation_id, message):
+        """Chat with simulation"""
+```
+### Case-Simulation Linking
+Cases and simulations are linked:
+```json
+{
+  "case_id": "uuid",
+  "simulation_id": "uuid",
+  "user_input": "...",
+  "route": {...},
+  "outputs": {...},
+  "final_answer": "..."
+}
+```
+### Simulation Workflow
+```
+User Input with Simulation Keywords
+    ↓
+Switchboard (task_family="simulation")
+    ↓
+Research (gather context)
+    ↓
+Submit to MiroFish
+    ↓
+Poll for completion
+    ↓
+Synthesize results
+    ↓
+Return to user
+```
+## Layer 5: Autonomous Knowledge Evolution
+### Knowledge Ingestion
+**Sources:**
+- Web search (Tavily)
+- News (NewsAPI)
+- URLs (Jina Reader)
+**Process:**
+1. Fetch content from source
+2. Compress to 2-4KB summary using LLM
+3. Store with metadata
+4. Enforce 200MB storage limit
+5. LRU eviction when limit reached
+**Scheduling:**
+- Every 6 hours (configurable)
+- Only when system idle (CPU <50%)
+- Only when battery OK (>30%)
+### Experience Learning
+**Learning from Cases:**
+1. Extract metadata from case execution
+2. Detect patterns (domain, sources, agents)
+3. Track route effectiveness
+4. Track prompt performance
+5. Track provider reliability
+**Pattern Detection:**
+- Domain expertise patterns
+- Preferred source patterns
+- Agent workflow patterns
+- Minimum frequency: 3 occurrences
+### Prompt Evolution
+**A/B Testing Process:**
+1. Create prompt variant using LLM
+2. Test variant with sample inputs
+3. Measure quality metrics
+4. Compare win rates
+5. Promote if criteria met (>70% win rate, >10 tests)
+**Versioning:**
+```json
+{
+  "id": "uuid",
+  "prompt_name": "research",
+  "version": 2,
+  "status": "testing",
+  "test_count": 15,
+  "win_count": 12,
+  "win_rate": 0.80
+}
+```
+### Skill Distillation
+**Skill Creation:**
+1. Detect patterns in successful cases
+2. Distill into reusable skills
+3. Test skills with validation cases
+4. Track usage and success rate
+**Skill Structure:**
+```json
+{
+  "id": "domain_expertise_finance",
+  "type": "domain_expertise",
+  "trigger_patterns": ["stock", "market"],
+  "recommended_agents": ["research", "verifier"],
+  "preferred_sources": ["bloomberg", "reuters"],
+  "success_rate": 0.85
+}
+```
+### Trust Management
+**Source Trust Scoring:**
+- Initial score: 0.5 (neutral)
+- Updated based on verification outcomes
+- Exponential moving average
+- Minimum verifications: 3
+**Trust Categories:**
+- Trusted: score ≥0.7, verifications ≥3
+- Untrusted: score ≤0.3, verifications ≥3
+- Unknown: verifications <3
+### Freshness Management
+**Domain-Specific Expiration:**
+- Finance: 7-day half-life
+- General: 30-day half-life
+- Exponential decay: `freshness = 2^(-age_days / half_life_days)`
+**Refresh Recommendations:**
+- Freshness <0.3: Stale
+- Prioritize by staleness
+- Automatic refresh during scheduled ingestion
+### Learning Scheduler
+**Safeguards:**
+- CPU usage check: <50%
+- Battery level check: >30%
+- System idle detection
+- Error handling with backoff
+**Scheduled Tasks:**
+- Knowledge ingestion: Every 6 hours
+- Expired cleanup: Daily
+- Pattern detection: Daily
+- Skill distillation: Weekly
+- Prompt optimization: Weekly
+## Data Flow
+### Normal Request Flow
+```
+1. User submits request
+2. Switchboard classifies (4D)
+3. Domain pack detected
+4. Research gathers info (domain-enhanced)
+5. Planner creates plan
+6. Verifier validates (domain-enhanced)
+7. Synthesizer produces answer
+8. Case saved
+9. Learning extracts metadata
+```
+### Simulation Request Flow
+```
+1. User submits request with simulation keywords
+2. Switchboard detects simulation
+3. Research gathers context
+4. Submit to MiroFish
+5. Poll for completion
+6. Synthesize results
+7. Case and simulation saved
+8. Learning extracts metadata
+```
+### Learning Flow
+```
+1. Scheduler checks system conditions
+2. If idle and battery OK:
+   a. Ingest knowledge from sources
+   b. Compress to 2-4KB summaries
+   c. Store with metadata
+   d. Enforce storage limits
+3. Detect patterns in recent cases
+4. Distill skills from patterns
+5. Optimize prompts via A/B testing
+6. Update trust scores
+7. Calculate freshness scores
+```
+## Deployment Considerations
+### Single-User Local (Recommended)
+**Optimizations:**
+- JSON storage (no database)
+- Learning scheduler with safeguards
+- 200MB knowledge cache limit
+- CPU/battery awareness
+- Automatic LRU eviction
+**Hardware Requirements:**
+- 8GB RAM minimum
+- 256GB storage minimum
+- M2 Air or equivalent
+### Multi-User Production
+**Required Changes:**
+- PostgreSQL for storage
+- Redis for caching
+- Authentication/authorization
+- Rate limiting
+- Request queuing
+- Monitoring/alerting
+- Horizontal scaling
+## Security
+### API Key Management
+- All keys in environment variables
+- Never committed to source control
+- Validated on startup
+### Error Handling
+- Sanitized error messages
+- No internal details leaked
+- Comprehensive logging
+### Input Validation
+- Pydantic schemas for all inputs
+- Type checking
+- Length limits
+- Sanitization
+## Performance
+### Response Times
+- Simple queries: <5s
+- Medium queries: <15s
+- Complex queries: <30s
+- Simulations: 60-120s
+### Optimization Strategies
+- Connection pooling for external APIs
+- Request timeouts (30s)
+- Caching for market quotes (5min TTL)
+- Rate limiting for external APIs
+- Async/await throughout
+## Monitoring
+### Health Checks
+- `/health` - Basic health
+- `/health/deep` - Detailed health with provider status
+### Metrics
+- Case execution time
+- Provider success/failure rates
+- Domain pack usage
+- Learning task completion
+- Storage usage
+### Logging
+- Structured logging
+- Log levels: DEBUG, INFO, WARNING, ERROR
+- Rotation and retention
+- Provider fallback events
+- Learning task events

README.md CHANGED Viewed

@@ -1,182 +1,348 @@
-# MiroOrg Basic v2
-A multi-agent AI organization system built with FastAPI, LangGraph, and Google Gemini.
 ## Overview
-MiroOrg Basic v2 is an intelligent system that processes user requests through a pipeline of specialized AI agents:
-- **Switchboard**: Routes and categorizes incoming requests
-- **Research**: Gathers relevant information and context
-- **Planner**: Creates actionable plans based on research
-- **Verifier**: Validates plans and identifies potential issues
-- **Synthesizer**: Produces final, comprehensive responses
 ## Architecture
-- **Backend**: FastAPI application with REST endpoints
-- **Orchestration**: LangGraph for agent workflow management
-- **AI Model**: Google Gemini for agent responses
-- **Memory**: JSON-based case persistence
-- **Configuration**: Environment variables for API keys
 ## Quick Start
 ### Prerequisites
-- Python 3.14+
-- Google Gemini API key
-### Installation
-1. Clone the repository:
 ```bash
-git clone <repository-url>
-cd miroorg-basic-v2
 ```
-2. Set up virtual environment:
 ```bash
-cd backend
-python3 -m venv venv
-source venv/bin/activate  # On Windows: venv\Scripts\activate
 ```
-3. Install dependencies:
 ```bash
-pip install -r requirements.txt
 ```
-4. Configure environment:
 ```bash
-cp .env.example .env
-# Edit .env and add your GEMINI_API_KEY
 ```
-5. Run the application:
 ```bash
-uvicorn app.main:app --reload
 ```
-The API will be available at `http://localhost:8000`
-## API Endpoints
-### GET /health
-Health check endpoint.
-**Response:**
-```json
-{
-  "status": "healthy"
-}
 ```
-### POST /run
-Process a user request through the agent pipeline.
-**Request:**
-```json
-{
-  "user_input": "Your request here"
-}
-```
-**Response:**
-```json
-{
-  "case_id": "uuid-string",
-  "route": {...},
-  "research": {...},
-  "planner": {...},
-  "verifier": {...},
-  "final": {...}
-}
-```
-## Configuration
-Create a `.env` file in the backend directory:
-```env
-GEMINI_API_KEY=your_api_key_here
-MODEL_NAME=gemini-1.5-flash
 ```
 ## Project Structure
 ```
 backend/
 ├── app/
-│   ├── __init__.py
-│   ├── config.py          # Configuration and paths
-│   ├── main.py            # FastAPI application
-│   ├── schemas.py         # Pydantic models
-│   ├── memory.py          # Case persistence
-│   ├── graph.py           # LangGraph orchestration
-│   ├── agents/            # Agent implementations
-│   │   ├── __init__.py
-│   │   ├── _model.py      # Gemini API wrapper
-│   │   ├── switchboard.py
-│   │   ├── research.py
-│   │   ├── planner.py
-│   │   ├── verifier.py
-│   │   └── synthesizer.py
-│   ├── prompts/           # Agent instruction files
-│   │   ├── research.txt
-│   │   ├── planner.txt
-│   │   ├── verifier.txt
-│   │   └── synthesizer.txt
-│   └── data/
-│       ├── logs/          # Application logs
-│       └── memory/        # Saved cases
-├── .env                   # Environment variables
-├── requirements.txt       # Python dependencies
-└── README.md
 ```
 ## Development
-### Running Tests
-```bash
-pytest
-```
-### Adding New Agents
-1. Create agent file in `app/agents/`
-2. Add prompt file in `app/prompts/`
-3. Update `graph.py` to include new node
-4. Add to `schemas.py` if needed
-### Logging
-Logs are written to `app/data/logs/` with rotation. Check logs for debugging agent execution and API errors.
-## Deployment
-### Docker
-```dockerfile
-FROM python:3.14-slim
-WORKDIR /app
-COPY requirements.txt .
-RUN pip install -r requirements.txt
-COPY . .
-EXPOSE 8000
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
-```
-### Production Considerations
-- Set `GEMINI_API_KEY` securely (environment variables, secrets management)
-- Configure logging for your deployment environment
-- Add rate limiting and authentication as needed
-- Monitor agent performance and API usage
 ## License
-[Add your license here]

+# MiroOrg v1.1 - AI Financial Intelligence System
+A general intelligence operating system that orchestrates multiple specialist agents, runs simulations, and supports pluggable domain packs with autonomous knowledge evolution.
 ## Overview
+MiroOrg v1.1 is a 5-layer AI system that processes user requests through specialized agents, integrates domain-specific intelligence, runs simulations, and continuously improves itself over time:
+- **Layer 1: Core Platform** - Multi-provider LLM abstraction (OpenRouter, Ollama, OpenAI) with automatic fallback
+- **Layer 2: Domain Intelligence** - Pluggable domain packs (starting with Finance) that enhance agent capabilities
+- **Layer 3: Agent Orchestration** - Five specialized agents working in concert
+- **Layer 4: Simulation Lab** - Integration with MiroFish for scenario modeling
+- **Layer 5: Autonomous Learning** - Self-improvement through knowledge ingestion, prompt evolution, and skill distillation
 ## Architecture
+### Five Specialized Agents
+1. **Switchboard** - Routes requests using 4-dimensional classification:
+   - Task family (normal/simulation)
+   - Domain pack (finance/general/policy/custom)
+   - Complexity (simple/medium/complex)
+   - Execution mode (solo/standard/deep)
+2. **Research** - Gathers information with domain-enhanced capabilities:
+   - Web search via Tavily
+   - News via NewsAPI
+   - Financial data via Alpha Vantage
+   - Entity and ticker extraction for finance domain
+3. **Verifier** - Validates information with credibility scoring:
+   - Source reliability checking
+   - Rumor and scam detection
+   - Uncertainty quantification
+4. **Planner** - Creates actionable plans with simulation awareness:
+   - Detects opportunities for scenario modeling
+   - Suggests simulation mode when appropriate
+5. **Synthesizer** - Produces final answers with confidence metrics:
+   - Uncertainty quantification
+   - Simulation recommendations
+   - Structured output with metadata
+### Domain Pack System
+Domain packs are pluggable modules that enhance agent capabilities for specific domains:
+- **Finance Domain Pack** (included):
+  - Market data integration
+  - News analysis
+  - Entity/ticker resolution
+  - Credibility scoring
+  - Stance detection
+  - Event analysis
+  - Prediction capabilities
+- **Custom Domain Packs** (extensible):
+  - Implement `DomainPack` base class
+  - Register in domain registry
+  - Agents automatically detect and use capabilities
+### Autonomous Learning Layer
+The system improves itself over time without local model training:
+- **Knowledge Ingestion**: Automatically ingests knowledge from web search, news, and URLs
+- **Experience Learning**: Learns from case execution patterns
+- **Prompt Evolution**: A/B tests and evolves agent prompts
+- **Skill Distillation**: Extracts reusable skills from repeated patterns
+- **Trust Management**: Tracks source reliability over time
+- **Freshness Management**: Manages knowledge expiration with domain-specific rules
+- **Scheduler**: Runs learning tasks with CPU/battery safeguards for laptop deployment
+Storage limits: 200MB max for knowledge cache, 2-4KB per summary.
 ## Quick Start
 ### Prerequisites
+- Python 3.10+
+- Node.js 18+ (for frontend)
+- At least one LLM provider:
+  - OpenRouter API key (recommended), or
+  - Ollama running locally, or
+  - OpenAI API key
+### Backend Setup
+1. Clone and navigate to backend:
 ```bash
+cd backend
+python3 -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+pip install -r requirements.txt
 ```
+2. Configure environment:
 ```bash
+cp .env.example .env
+# Edit .env and add your API keys
 ```
+3. Run backend:
 ```bash
+python -m uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
 ```
+Backend available at `http://localhost:8000`
+### Frontend Setup
+1. Navigate to frontend:
 ```bash
+cd frontend
+npm install
 ```
+2. Configure environment:
 ```bash
+cp .env.local.example .env.local
+# Edit if needed (defaults to localhost:8000)
 ```
+3. Run frontend:
+```bash
+npm run dev
+```
+Frontend available at `http://localhost:3000`
+## Environment Variables
+### Required
+```env
+# Primary LLM Provider (choose one)
+PRIMARY_PROVIDER=openrouter  # or ollama, openai
+OPENROUTER_API_KEY=your_key_here
+# OR
+OLLAMA_ENABLED=true
+# OR
+OPENAI_API_KEY=your_key_here
 ```
+### Optional Services
+```env
+# Web Search
+TAVILY_API_KEY=your_key_here
+# News
+NEWSAPI_KEY=your_key_here
+# Financial Data
+ALPHAVANTAGE_API_KEY=your_key_here
+# Simulation Lab
+MIROFISH_ENABLED=true
+MIROFISH_API_BASE=http://127.0.0.1:5001
+# Learning Layer
+LEARNING_ENABLED=true
+KNOWLEDGE_MAX_SIZE_MB=200
+LEARNING_TOPICS=finance,markets,technology,policy
 ```
+See `.env.example` for complete configuration options.
+## API Endpoints
+### Core Endpoints
+- `GET /health` - Health check
+- `GET /health/deep` - Detailed health with provider status
+- `GET /config/status` - System configuration
+- `POST /run` - Process user request through agent pipeline
+- `GET /cases` - List case history
+- `GET /cases/{id}` - Get case details
+### Agent Endpoints
+- `GET /agents` - List all agents
+- `GET /agents/{name}` - Get agent details
+- `POST /run/agent` - Run single agent
+### Prompt Management
+- `GET /prompts` - List all prompts
+- `GET /prompts/{name}` - Get prompt content
+- `PUT /prompts/{name}` - Update prompt
+### Simulation Endpoints
+- `POST /simulation/run` - Submit simulation request
+- `GET /simulation/{id}` - Get simulation status
+- `GET /simulation/{id}/report` - Get simulation report
+- `POST /simulation/{id}/chat` - Chat with simulation
+### Learning Endpoints
+- `GET /learning/status` - Learning engine status
+- `POST /learning/run-once` - Manually trigger learning task
+- `GET /learning/insights` - Learning insights
+- `GET /learning/knowledge` - List knowledge items
+- `GET /learning/knowledge/search` - Search knowledge
+- `POST /learning/knowledge/ingest` - Ingest knowledge
+- `GET /learning/skills` - List distilled skills
+- `POST /learning/skills/distill` - Distill new skills
+- `GET /learning/sources/trust` - Get trusted sources
+- `GET /learning/sources/freshness` - Get stale items
+- `GET /learning/prompts/versions/{name}` - Get prompt versions
+- `POST /learning/prompts/optimize/{name}` - Optimize prompt
+- `POST /learning/prompts/promote/{name}/{version}` - Promote prompt
 ## Project Structure
 ```
 backend/
 ├── app/
+│   ├── agents/              # Five specialized agents
+│   │   ├── _model.py        # Multi-provider LLM abstraction
+│   │   ├── switchboard.py   # 4D routing
+│   │   ├── research.py      # Domain-enhanced research
+│   │   ├── verifier.py      # Credibility scoring
+│   │   ├── planner.py       # Simulation-aware planning
+│   │   └── synthesizer.py   # Final answer generation
+│   ├── domain_packs/        # Pluggable domain intelligence
+│   │   ├── base.py          # DomainPack base class
+│   │   ├── registry.py      # Domain pack registry
+│   │   └── finance/         # Finance domain pack
+│   ├── services/            # Core services
+│   │   ├── learning/        # Autonomous learning layer
+│   │   │   ├── knowledge_ingestor.py
+│   │   │   ├── knowledge_store.py
+│   │   │   ├── learning_engine.py
+│   │   │   ├── prompt_optimizer.py
+│   │   │   ├── skill_distiller.py
+│   │   │   ├── trust_manager.py
+│   │   │   └── scheduler.py
+│   │   ├── api_discovery/   # API discovery subsystem
+│   │   ├── case_store.py    # Case persistence
+│   │   ├── simulation_store.py
+│   │   └── external_sources.py
+│   ├── routers/             # API routers
+│   │   ├── simulation.py
+│   │   └── learning.py
+│   ├── prompts/             # Agent prompts
+│   ├── data/                # Data storage
+│   │   ├── memory/          # Case records
+│   │   ├── simulations/     # Simulation records
+│   │   ├── knowledge/       # Knowledge cache
+│   │   ├── skills/          # Distilled skills
+│   │   ├── prompt_versions/ # Prompt versions
+│   │   └── learning/        # Learning metadata
+│   ├── config.py            # Configuration
+│   ├── main.py              # FastAPI app
+│   ├── schemas.py           # Pydantic models
+│   └── graph.py             # Agent orchestration
+frontend/
+├── src/
+│   ├── app/                 # Next.js pages
+│   │   ├── page.tsx         # Dashboard
+│   │   ├── analyze/         # Analysis interface
+│   │   ├── cases/           # Case history
+│   │   ├── simulation/      # Simulation interface
+│   │   ├── prompts/         # Prompt lab
+│   │   └── config/          # System config
+│   ├── components/          # React components
+│   │   ├── layout/
+│   │   ├── common/
+│   │   ├── analyze/
+│   │   ├── cases/
+│   │   ├── simulation/
+│   │   └── prompts/
+│   └── lib/
+│       ├── api.ts           # API client
+│       └── types.ts         # TypeScript types
 ```
 ## Development
+### Adding a New Domain Pack
+1. Create domain pack directory: `backend/app/domain_packs/your_domain/`
+2. Implement `DomainPack` base class in `pack.py`
+3. Add domain-specific modules (data sources, analyzers, etc.)
+4. Register in `backend/app/domain_packs/init_packs.py`
+5. Agents will automatically detect and use capabilities
+### Customizing Agent Prompts
+1. Navigate to `backend/app/prompts/`
+2. Edit prompt files (research.txt, planner.txt, etc.)
+3. Changes take effect immediately (no restart needed)
+4. Use Prompt Lab UI for live editing
+### Running with Different Providers
+**OpenRouter (recommended for production):**
+```env
+PRIMARY_PROVIDER=openrouter
+OPENROUTER_API_KEY=your_key
+```
+**Ollama (local, privacy-focused):**
+```env
+PRIMARY_PROVIDER=ollama
+OLLAMA_ENABLED=true
+OLLAMA_BASE_URL=http://127.0.0.1:11434/api
+```
+**OpenAI (high quality):**
+```env
+PRIMARY_PROVIDER=openai
+OPENAI_API_KEY=your_key
+```
+## Deployment
+### Single-User Local Deployment (Recommended)
+This system is optimized for single-user local deployment on laptops (tested on 8GB/256GB M2 Air):
+- Learning scheduler respects CPU usage (<50%) and battery level (>30%)
+- Knowledge cache limited to 200MB with LRU eviction
+- All data stored locally in `backend/app/data/`
+- No cloud dependencies required
+### Production Deployment
+For multi-user production deployment:
+1. Add authentication and authorization
+2. Use PostgreSQL instead of JSON storage
+3. Add rate limiting and request queuing
+4. Deploy with Docker/Kubernetes
+5. Use managed LLM services
+6. Add monitoring and alerting
 ## License
+[Add your license here]
+## Contributing
+[Add contribution guidelines here]

backend/.env.example CHANGED Viewed

@@ -66,4 +66,13 @@ SIMULATION_TRIGGER_KEYWORDS=simulate,predict,what if,reaction,scenario,public op
 # ---------- Domain Packs ----------
 # Enable/disable domain packs (future feature)
-FINANCE_DOMAIN_PACK_ENABLED=true

 # ---------- Domain Packs ----------
 # Enable/disable domain packs (future feature)
+FINANCE_DOMAIN_PACK_ENABLED=true
+# ---------- Sentinel Layer ----------
+# Sentinel provides adaptive maintenance and self-healing
+SENTINEL_ENABLED=true
+SENTINEL_CYCLE_INTERVAL_MINUTES=60
+SENTINEL_MAX_DIAGNOSES_PER_CYCLE=5
+SENTINEL_CPU_THRESHOLD=80.0
+SENTINEL_BATTERY_THRESHOLD=20.0

backend/app/agents/planner.py CHANGED Viewed

@@ -1,7 +1,45 @@
 from app.agents._model import call_model, LLMProviderError
 def run_planner(user_input: str, research_output: str, prompt_template: str) -> dict:
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
@@ -10,11 +48,16 @@ def run_planner(user_input: str, research_output: str, prompt_template: str) ->
     try:
         text = call_model(prompt, mode="chat")
         return {
             "agent": "planner",
             "summary": text,
-            "details": {"model_mode": "chat"},
-            "confidence": 0.75,
         }
     except LLMProviderError as e:
         return {

+import re
 from app.agents._model import call_model, LLMProviderError
+from app.config import SIMULATION_TRIGGER_KEYWORDS
+import logging
+logger = logging.getLogger(__name__)
+_CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
+def _extract_confidence(text: str, default: float = 0.5) -> float:
+    """Extract confidence score from structured LLM output."""
+    match = _CONFIDENCE_PATTERN.search(text)
+    if match:
+        try:
+            score = float(match.group(1))
+            return max(0.0, min(1.0, score))
+        except ValueError:
+            pass
+    return default
 def run_planner(user_input: str, research_output: str, prompt_template: str) -> dict:
+    # Detect if simulation mode would be appropriate
+    user_lower = user_input.lower()
+    simulation_suggested = any(keyword in user_lower for keyword in SIMULATION_TRIGGER_KEYWORDS)
+    # Check for scenario/prediction patterns in research
+    research_lower = research_output.lower()
+    scenario_patterns = ["scenario", "what if", "predict", "forecast", "impact", "reaction",
+                         "what would", "how would", "could affect", "might happen"]
+    has_scenario_context = any(pattern in research_lower for pattern in scenario_patterns)
+    # Also check user input for scenario patterns
+    user_scenario_patterns = ["what would", "what if", "how would", "what happens",
+                              "what could", "imagine", "suppose", "hypothetical"]
+    has_user_scenario = any(pattern in user_lower for pattern in user_scenario_patterns)
+    if (has_scenario_context or has_user_scenario) and not simulation_suggested:
+        simulation_suggested = True
+        logger.info("Planner detected scenario analysis opportunity - suggesting simulation mode")
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
     try:
         text = call_model(prompt, mode="chat")
+        confidence = _extract_confidence(text, default=0.70)
         return {
             "agent": "planner",
             "summary": text,
+            "details": {
+                "model_mode": "chat",
+                "simulation_suggested": simulation_suggested
+            },
+            "confidence": confidence,
         }
     except LLMProviderError as e:
         return {

backend/app/agents/research.py CHANGED Viewed

@@ -1,23 +1,85 @@
 from app.agents._model import call_model, LLMProviderError
 from app.services.external_sources import build_external_context
 def run_research(user_input: str, prompt_template: str) -> dict:
     external_context = build_external_context(user_input)
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
         f"External Context:\n{external_context}"
     )
     try:
         text = call_model(prompt, mode="chat")
         return {
             "agent": "research",
             "summary": text,
-            "details": {"external_context_used": external_context != "No external API context available."},
-            "confidence": 0.72,
         }
     except LLMProviderError as e:
         return {

+import re
 from app.agents._model import call_model, LLMProviderError
 from app.services.external_sources import build_external_context
+from app.domain_packs.registry import get_registry
+import logging
+logger = logging.getLogger(__name__)
+_CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
+def _extract_confidence(text: str, default: float = 0.5) -> float:
+    """Extract confidence score from structured LLM output."""
+    match = _CONFIDENCE_PATTERN.search(text)
+    if match:
+        try:
+            score = float(match.group(1))
+            return max(0.0, min(1.0, score))
+        except ValueError:
+            pass
+    return default
 def run_research(user_input: str, prompt_template: str) -> dict:
     external_context = build_external_context(user_input)
+    # Detect domain and enhance research with domain pack capabilities
+    registry = get_registry()
+    detected_domain = registry.detect_domain(user_input)
+    domain_enhanced_context = {}
+    if detected_domain:
+        logger.info(f"Enhancing research with domain pack: {detected_domain}")
+        pack = registry.get_pack(detected_domain)
+        if pack:
+            try:
+                base_context = {
+                    "user_input": user_input,
+                    "external_context": external_context
+                }
+                domain_enhanced_context = pack.enhance_research(user_input, base_context)
+                logger.info(f"Domain enhancement successful: {detected_domain}")
+            except Exception as e:
+                logger.warning(f"Domain enhancement failed for {detected_domain}: {e}")
+                domain_enhanced_context = {}
+    # Build enhanced prompt with domain context
+    domain_context_str = ""
+    if domain_enhanced_context:
+        domain_context_str = "\n\nDomain-Specific Context:\n"
+        for key, value in domain_enhanced_context.items():
+            if value:
+                domain_context_str += f"{key}: {value}\n"
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
         f"External Context:\n{external_context}"
+        f"{domain_context_str}"
     )
     try:
         text = call_model(prompt, mode="chat")
+        # Extract structured entities from domain enhancement
+        entities = domain_enhanced_context.get("entities", []) if domain_enhanced_context else []
+        tickers = domain_enhanced_context.get("tickers", []) if domain_enhanced_context else []
+        # Extract confidence from LLM output (our prompt asks for it)
+        confidence = _extract_confidence(text, default=0.65)
         return {
             "agent": "research",
             "summary": text,
+            "details": {
+                "external_context_used": external_context != "No external API context available.",
+                "domain_pack": detected_domain or "general",
+                "entities": entities,
+                "tickers": tickers,
+                "domain_enhanced": bool(domain_enhanced_context)
+            },
+            "confidence": confidence,
         }
     except LLMProviderError as e:
         return {

backend/app/agents/switchboard.py CHANGED Viewed

@@ -23,16 +23,28 @@ def decide_route(user_input: str) -> dict:
     words = len(text.split())
     # Dimension 1: Task family (simulation detection)
     task_family = "simulation" if any(k in lower for k in SIMULATION_TRIGGER_KEYWORDS) else "normal"
     # Dimension 2: Domain pack detection
     registry = get_registry()
     detected_domain = registry.detect_domain(user_input)
     domain_pack = detected_domain if detected_domain else "general"
-    # Dimension 3: Complexity based on word count
     if task_family == "simulation":
         complexity = "complex"
     elif words <= 5:
         complexity = "simple"
     elif words <= 25:
@@ -40,9 +52,12 @@ def decide_route(user_input: str) -> dict:
     else:
         complexity = "complex"
-    # Dimension 4: Execution mode based on complexity
     if task_family == "simulation":
         execution_mode = "deep"
     elif complexity == "simple":
         execution_mode = "solo"
     elif complexity == "medium":
@@ -50,10 +65,18 @@ def decide_route(user_input: str) -> dict:
     else:
         execution_mode = "deep"
     return {
         "task_family": task_family,
         "domain_pack": domain_pack,
         "complexity": complexity,
         "execution_mode": execution_mode,
-        "risk_level": "medium" if execution_mode == "deep" else "low",
     }

     words = len(text.split())
     # Dimension 1: Task family (simulation detection)
+    # Check configured keywords
     task_family = "simulation" if any(k in lower for k in SIMULATION_TRIGGER_KEYWORDS) else "normal"
+    # Additional scenario patterns that should also trigger deep analysis
+    scenario_patterns = [
+        "what would", "what if", "how would", "what happens if",
+        "what could", "imagine if", "suppose", "hypothetical",
+        "could affect", "might impact", "would react",
+    ]
+    is_speculative = any(p in lower for p in scenario_patterns)
     # Dimension 2: Domain pack detection
     registry = get_registry()
     detected_domain = registry.detect_domain(user_input)
     domain_pack = detected_domain if detected_domain else "general"
+    # Dimension 3: Complexity based on word count and nature
     if task_family == "simulation":
         complexity = "complex"
+    elif is_speculative:
+        # Speculative questions always get at least medium complexity
+        complexity = "complex" if words > 15 else "medium"
     elif words <= 5:
         complexity = "simple"
     elif words <= 25:
     else:
         complexity = "complex"
+    # Dimension 4: Execution mode based on complexity and nature
     if task_family == "simulation":
         execution_mode = "deep"
+    elif is_speculative:
+        # Speculative questions always get deep mode (verifier should check uncertainty)
+        execution_mode = "deep"
     elif complexity == "simple":
         execution_mode = "solo"
     elif complexity == "medium":
     else:
         execution_mode = "deep"
+    # Risk level
+    if execution_mode == "deep":
+        risk_level = "medium"
+    elif is_speculative:
+        risk_level = "medium"
+    else:
+        risk_level = "low"
     return {
         "task_family": task_family,
         "domain_pack": domain_pack,
         "complexity": complexity,
         "execution_mode": execution_mode,
+        "risk_level": risk_level,
     }

backend/app/agents/synthesizer.py CHANGED Viewed

@@ -1,4 +1,43 @@
 from app.agents._model import call_model, LLMProviderError
 def run_synthesizer(
@@ -8,6 +47,18 @@ def run_synthesizer(
     verifier_output: str,
     prompt_template: str
 ) -> dict:
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
@@ -18,11 +69,27 @@ def run_synthesizer(
     try:
         text = call_model(prompt, mode="chat")
         return {
             "agent": "synthesizer",
             "summary": text,
-            "details": {"model_mode": "chat"},
-            "confidence": 0.82,
         }
     except LLMProviderError as e:
         return {

+import re
 from app.agents._model import call_model, LLMProviderError
+import logging
+logger = logging.getLogger(__name__)
+_CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
+_UNCERTAINTY_PATTERN = re.compile(r'Uncertainty\s*Level:\s*(HIGH|MEDIUM|LOW)', re.IGNORECASE)
+def _extract_confidence(text: str, default: float = 0.5) -> float:
+    """Extract confidence score from structured LLM output."""
+    match = _CONFIDENCE_PATTERN.search(text)
+    if match:
+        try:
+            score = float(match.group(1))
+            return max(0.0, min(1.0, score))
+        except ValueError:
+            pass
+    return default
+def _extract_uncertainty(text: str) -> str:
+    """Extract uncertainty level from structured LLM output."""
+    match = _UNCERTAINTY_PATTERN.search(text)
+    if match:
+        return match.group(1).upper()
+    # Fallback heuristic
+    text_lower = text.lower()
+    uncertainty_indicators = ["uncertain", "unclear", "missing", "unverified",
+                              "assumption", "unknown", "speculative", "conflicting",
+                              "limited evidence", "cannot confirm"]
+    count = sum(1 for indicator in uncertainty_indicators if indicator in text_lower)
+    if count >= 4:
+        return "HIGH"
+    elif count >= 2:
+        return "MEDIUM"
+    return "LOW"
 def run_synthesizer(
     verifier_output: str,
     prompt_template: str
 ) -> dict:
+    # Extract uncertainty level from verifier output (or synthesizer will self-assess)
+    uncertainty_level = _extract_uncertainty(verifier_output)
+    # Check if simulation was recommended by planner or verifier
+    planner_lower = planner_output.lower()
+    simulation_recommended = (
+        ("simulation recommended: yes" in planner_lower) or
+        ("simulation" in planner_lower and "recommend" in planner_lower)
+    )
+    logger.info(f"Synthesizer: uncertainty_level={uncertainty_level}, simulation_recommended={simulation_recommended}")
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
     try:
         text = call_model(prompt, mode="chat")
+        confidence = _extract_confidence(text, default=0.60)
+        # Also try to extract uncertainty from synthesizer's own output
+        synth_uncertainty = _extract_uncertainty(text)
+        # Use the higher uncertainty between verifier and synthesizer
+        if synth_uncertainty == "HIGH" or uncertainty_level == "HIGH":
+            final_uncertainty = "HIGH"
+        elif synth_uncertainty == "MEDIUM" or uncertainty_level == "MEDIUM":
+            final_uncertainty = "MEDIUM"
+        else:
+            final_uncertainty = "LOW"
         return {
             "agent": "synthesizer",
             "summary": text,
+            "details": {
+                "model_mode": "chat",
+                "uncertainty_level": final_uncertainty,
+                "simulation_recommended": simulation_recommended
+            },
+            "confidence": confidence,
         }
     except LLMProviderError as e:
         return {

backend/app/agents/verifier.py CHANGED Viewed

@@ -1,21 +1,94 @@
 from app.agents._model import call_model, LLMProviderError
 def run_verifier(user_input: str, research_output: str, planner_output: str, prompt_template: str) -> dict:
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
         f"Research Packet:\n{research_output}\n\n"
         f"Planner Output:\n{planner_output}"
     )
     try:
         text = call_model(prompt, mode="reasoner")
         return {
             "agent": "verifier",
             "summary": text,
-            "details": {"model_mode": "reasoner"},
-            "confidence": 0.79,
         }
     except LLMProviderError as e:
         return {

+import re
 from app.agents._model import call_model, LLMProviderError
+from app.domain_packs.registry import get_registry
+import logging
+logger = logging.getLogger(__name__)
+_CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
+def _extract_confidence(text: str, default: float = 0.5) -> float:
+    """Extract confidence score from structured LLM output."""
+    match = _CONFIDENCE_PATTERN.search(text)
+    if match:
+        try:
+            score = float(match.group(1))
+            return max(0.0, min(1.0, score))
+        except ValueError:
+            pass
+    return default
 def run_verifier(user_input: str, research_output: str, planner_output: str, prompt_template: str) -> dict:
+    # Detect domain and enhance verification with domain pack capabilities
+    registry = get_registry()
+    detected_domain = registry.detect_domain(user_input)
+    domain_verification = {}
+    if detected_domain:
+        logger.info(f"Enhancing verification with domain pack: {detected_domain}")
+        pack = registry.get_pack(detected_domain)
+        if pack:
+            try:
+                # Extract claims from research and planner outputs for verification
+                claims = []
+                for line in (research_output + "\n" + planner_output).split("\n"):
+                    stripped = line.strip()
+                    if stripped and len(stripped) > 20 and not stripped.startswith(("Facts:", "Assumptions:", "Open Questions:", "Key Facts:", "Plan:", "Objective:")):
+                        claims.append(stripped)
+                context = {
+                    "user_input": user_input,
+                    "research_output": research_output,
+                    "planner_output": planner_output,
+                    "claims": claims[:30]  # Limit claims to avoid token overflow
+                }
+                domain_verification = pack.enhance_verification(claims[:30], context)
+                logger.info(f"Domain verification successful: {detected_domain}")
+            except Exception as e:
+                logger.warning(f"Domain verification failed for {detected_domain}: {e}")
+                domain_verification = {}
+    # Build enhanced prompt with domain verification
+    domain_verification_str = ""
+    if domain_verification:
+        domain_verification_str = "\n\nDomain-Specific Verification:\n"
+        for key, value in domain_verification.items():
+            if value:
+                domain_verification_str += f"{key}: {value}\n"
     prompt = (
         f"{prompt_template}\n\n"
         f"User Request:\n{user_input}\n\n"
         f"Research Packet:\n{research_output}\n\n"
         f"Planner Output:\n{planner_output}"
+        f"{domain_verification_str}"
     )
     try:
         text = call_model(prompt, mode="reasoner")
+        # Extract confidence from LLM output
+        confidence = _extract_confidence(text, default=0.70)
+        # Extract structured verification results
+        credibility_score = domain_verification.get("credibility_score", 0.5) if domain_verification else 0.5
+        rumors_detected = domain_verification.get("rumors_detected", []) if domain_verification else []
+        scams_detected = domain_verification.get("scams_detected", []) if domain_verification else []
         return {
             "agent": "verifier",
             "summary": text,
+            "details": {
+                "model_mode": "reasoner",
+                "domain_pack": detected_domain or "general",
+                "credibility_score": credibility_score,
+                "rumors_detected": rumors_detected,
+                "scams_detected": scams_detected,
+                "domain_verified": bool(domain_verification)
+            },
+            "confidence": confidence,
         }
     except LLMProviderError as e:
         return {

backend/app/config.py CHANGED Viewed

@@ -45,11 +45,6 @@ JINA_READER_BASE = os.getenv("JINA_READER_BASE", "https://r.jina.ai/http://")
 MIROFISH_ENABLED = os.getenv("MIROFISH_ENABLED", "false").lower() == "true"
 MIROFISH_API_BASE = os.getenv("MIROFISH_API_BASE", "http://127.0.0.1:5001")
 MIROFISH_TIMEOUT_SECONDS = int(os.getenv("MIROFISH_TIMEOUT_SECONDS", "120"))
-MIROFISH_HEALTH_PATH = os.getenv("MIROFISH_HEALTH_PATH", "/health")
-MIROFISH_RUN_PATH = os.getenv("MIROFISH_RUN_PATH", "/simulation/run")
-MIROFISH_STATUS_PATH = os.getenv("MIROFISH_STATUS_PATH", "/simulation/{id}")
-MIROFISH_REPORT_PATH = os.getenv("MIROFISH_REPORT_PATH", "/simulation/{id}/report")
-MIROFISH_CHAT_PATH = os.getenv("MIROFISH_CHAT_PATH", "/simulation/{id}/chat")
 SIMULATION_TRIGGER_KEYWORDS = [
     item.strip().lower()
@@ -143,3 +138,61 @@ def validate_config():
 # Run validation on import (startup)
 validate_config()

 MIROFISH_ENABLED = os.getenv("MIROFISH_ENABLED", "false").lower() == "true"
 MIROFISH_API_BASE = os.getenv("MIROFISH_API_BASE", "http://127.0.0.1:5001")
 MIROFISH_TIMEOUT_SECONDS = int(os.getenv("MIROFISH_TIMEOUT_SECONDS", "120"))
 SIMULATION_TRIGGER_KEYWORDS = [
     item.strip().lower()
 # Run validation on import (startup)
 validate_config()
+# Learning layer configuration
+LEARNING_ENABLED = os.getenv("LEARNING_ENABLED", "true").lower() == "true"
+KNOWLEDGE_MAX_SIZE_MB = int(os.getenv("KNOWLEDGE_MAX_SIZE_MB", "200"))
+LEARNING_SCHEDULE_INTERVAL = int(os.getenv("LEARNING_SCHEDULE_INTERVAL", "6"))  # hours
+LEARNING_BATCH_SIZE = int(os.getenv("LEARNING_BATCH_SIZE", "10"))
+LEARNING_TOPICS = [
+    item.strip()
+    for item in os.getenv(
+        "LEARNING_TOPICS",
+        "finance,markets,technology,policy",
+    ).split(",")
+    if item.strip()
+]
+def get_config():
+    """Get configuration object for dependency injection."""
+    class Config:
+        app_version = APP_VERSION
+        primary_provider = PRIMARY_PROVIDER
+        fallback_provider = FALLBACK_PROVIDER
+        openrouter_api_key = OPENROUTER_API_KEY
+        openrouter_base_url = OPENROUTER_BASE_URL
+        openrouter_chat_model = OPENROUTER_CHAT_MODEL
+        openrouter_reasoner_model = OPENROUTER_REASONER_MODEL
+        ollama_enabled = OLLAMA_ENABLED
+        ollama_base_url = OLLAMA_BASE_URL
+        ollama_chat_model = OLLAMA_CHAT_MODEL
+        ollama_reasoner_model = OLLAMA_REASONER_MODEL
+        openai_api_key = OPENAI_API_KEY
+        openai_base_url = OPENAI_BASE_URL
+        openai_chat_model = OPENAI_CHAT_MODEL
+        openai_reasoner_model = OPENAI_REASONER_MODEL
+        tavily_api_key = TAVILY_API_KEY
+        newsapi_key = NEWSAPI_KEY
+        alphavantage_api_key = ALPHAVANTAGE_API_KEY
+        mirofish_enabled = MIROFISH_ENABLED
+        mirofish_api_base = MIROFISH_API_BASE
+        data_dir = str(DATA_DIR)
+        memory_dir = str(MEMORY_DIR)
+        simulation_dir = str(SIMULATION_DIR)
+        prompts_dir = str(PROMPTS_DIR)
+        learning_enabled = LEARNING_ENABLED
+        knowledge_max_size_mb = KNOWLEDGE_MAX_SIZE_MB
+        learning_schedule_interval = LEARNING_SCHEDULE_INTERVAL
+        learning_batch_size = LEARNING_BATCH_SIZE
+        learning_topics = LEARNING_TOPICS
+    return Config()

backend/app/data/sentinel/.gitkeep ADDED Viewed

	@@ -0,0 +1,8 @@

+# Sentinel data directory
+# This directory stores sentinel runtime data including:
+# - alert_history.json
+# - patch_history.json
+# - cycle_history.json
+# - capability_history.json
+# - sentinel_config.json
+# - pending_patches/

backend/app/data/sentinel/pending_patches/.gitkeep ADDED Viewed

	@@ -0,0 +1,3 @@

+# Pending patches directory
+# This directory stores patches awaiting human review
+# Format: {patch_id}.patch.json

backend/app/graph.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import uuid
 import logging
 from typing import TypedDict, Dict, Any
@@ -14,9 +15,35 @@ from app.agents.synthesizer import run_synthesizer
 logger = logging.getLogger(__name__)
 def load_prompt(filename: str) -> str:
-    path = PROMPTS_DIR / filename
-    return path.read_text(encoding="utf-8")
 RESEARCH_PROMPT = load_prompt("research.txt")
@@ -44,51 +71,80 @@ def empty_output(agent_name: str) -> Dict[str, Any]:
     }
 def switchboard_node(state: OrgState):
-    return {"route": decide_route(state["user_input"])}
 def research_node(state: OrgState):
     if state["route"].get("execution_mode") == "solo":
         return {"research": empty_output("research")}
-    return {"research": run_research(state["user_input"], RESEARCH_PROMPT)}
 def planner_node(state: OrgState):
     if state["route"].get("execution_mode") == "solo":
         return {"planner": empty_output("planner")}
-    return {
         "planner": run_planner(
             state["user_input"],
             state["research"]["summary"],
-            PLANNER_PROMPT,
         )
     }
 def verifier_node(state: OrgState):
     if state["route"].get("execution_mode") != "deep":
         return {"verifier": empty_output("verifier")}
-    return {
         "verifier": run_verifier(
             state["user_input"],
             state["research"]["summary"],
             state["planner"]["summary"],
-            VERIFIER_PROMPT,
         )
     }
 def synthesizer_node(state: OrgState):
-    return {
         "final": run_synthesizer(
             state["user_input"],
             state["research"]["summary"],
             state["planner"]["summary"],
             state["verifier"]["summary"],
-            SYNTHESIZER_PROMPT,
         )
     }
 graph = StateGraph(OrgState)
@@ -110,6 +166,7 @@ compiled_graph = graph.compile()
 def run_case(user_input: str):
     case_id = str(uuid.uuid4())
     logger.info("Starting case %s", case_id)
     result = compiled_graph.invoke(
@@ -124,5 +181,6 @@ def run_case(user_input: str):
         }
     )
-    logger.info("Case %s completed", case_id)
     return result

 import uuid
+import time
 import logging
 from typing import TypedDict, Dict, Any
 logger = logging.getLogger(__name__)
+# ── Prompt Loading with Production Version Support ────────────────────────────
+_prompt_cache: Dict[str, str] = {}
 def load_prompt(filename: str) -> str:
+    """Load prompt from file, with caching."""
+    if filename not in _prompt_cache:
+        path = PROMPTS_DIR / filename
+        _prompt_cache[filename] = path.read_text(encoding="utf-8")
+    return _prompt_cache[filename]
+def get_active_prompt(prompt_name: str, filename: str) -> str:
+    """
+    Get the active prompt, preferring a promoted production version.
+    Falls back to the file-based prompt if none is promoted.
+    """
+    try:
+        from app.routers.learning import learning_engine
+        if learning_engine:
+            production = learning_engine.get_active_prompt(prompt_name)
+            if production:
+                logger.debug(f"Using production prompt version for {prompt_name}")
+                return production
+    except Exception:
+        pass
+    return load_prompt(filename)
 RESEARCH_PROMPT = load_prompt("research.txt")
     }
+# ── Node Functions with Timing ───────────────────────────────────────────────
 def switchboard_node(state: OrgState):
+    t0 = time.perf_counter()
+    result = {"route": decide_route(state["user_input"])}
+    elapsed = time.perf_counter() - t0
+    logger.info(f"[{state['case_id'][:8]}] switchboard: {elapsed:.2f}s — mode={result['route'].get('execution_mode')}")
+    return result
 def research_node(state: OrgState):
     if state["route"].get("execution_mode") == "solo":
         return {"research": empty_output("research")}
+    t0 = time.perf_counter()
+    prompt = get_active_prompt("research", "research.txt")
+    result = {"research": run_research(state["user_input"], prompt)}
+    elapsed = time.perf_counter() - t0
+    logger.info(f"[{state['case_id'][:8]}] research: {elapsed:.2f}s")
+    return result
 def planner_node(state: OrgState):
     if state["route"].get("execution_mode") == "solo":
         return {"planner": empty_output("planner")}
+    t0 = time.perf_counter()
+    prompt = get_active_prompt("planner", "planner.txt")
+    result = {
         "planner": run_planner(
             state["user_input"],
             state["research"]["summary"],
+            prompt,
         )
     }
+    elapsed = time.perf_counter() - t0
+    logger.info(f"[{state['case_id'][:8]}] planner: {elapsed:.2f}s")
+    return result
 def verifier_node(state: OrgState):
     if state["route"].get("execution_mode") != "deep":
         return {"verifier": empty_output("verifier")}
+    t0 = time.perf_counter()
+    prompt = get_active_prompt("verifier", "verifier.txt")
+    result = {
         "verifier": run_verifier(
             state["user_input"],
             state["research"]["summary"],
             state["planner"]["summary"],
+            prompt,
         )
     }
+    elapsed = time.perf_counter() - t0
+    logger.info(f"[{state['case_id'][:8]}] verifier: {elapsed:.2f}s")
+    return result
 def synthesizer_node(state: OrgState):
+    t0 = time.perf_counter()
+    prompt = get_active_prompt("synthesizer", "synthesizer.txt")
+    result = {
         "final": run_synthesizer(
             state["user_input"],
             state["research"]["summary"],
             state["planner"]["summary"],
             state["verifier"]["summary"],
+            prompt,
         )
     }
+    elapsed = time.perf_counter() - t0
+    logger.info(f"[{state['case_id'][:8]}] synthesizer: {elapsed:.2f}s")
+    return result
 graph = StateGraph(OrgState)
 def run_case(user_input: str):
     case_id = str(uuid.uuid4())
+    t0 = time.perf_counter()
     logger.info("Starting case %s", case_id)
     result = compiled_graph.invoke(
         }
     )
+    elapsed = time.perf_counter() - t0
+    logger.info("Case %s completed in %.2fs", case_id, elapsed)
     return result

backend/app/main.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import logging
-from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
 from app.schemas import UserTask, AgentRunRequest, PromptUpdateRequest
 from app.graph import run_case
@@ -18,22 +22,35 @@ from app.config import (
     MIROFISH_ENABLED,
     MEMORY_DIR,
     PROMPTS_DIR,
 )
 from app.services.case_store import list_cases, get_case, delete_case, memory_stats
 from app.services.prompt_store import list_prompts, get_prompt, update_prompt
 from app.services.health_service import deep_health
 from app.services.agent_registry import list_agents, get_agent, run_single_agent
 from app.routers.simulation import router as simulation_router
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-app = FastAPI(title="MiroOrg Basic", version=APP_VERSION)
-# Initialize domain packs on startup
 from app.domain_packs.init_packs import init_domain_packs
 init_domain_packs()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"],
@@ -43,8 +60,64 @@ app.add_middleware(
 )
 app.include_router(simulation_router)
 @app.get("/health")
 def health():
     return {"status": "ok", "version": APP_VERSION}
@@ -72,6 +145,8 @@ def config_status():
     }
 @app.get("/agents")
 def agents():
     return list_agents()
@@ -85,12 +160,23 @@ def agent_detail(agent_name: str):
     return agent
 @app.post("/run")
 def run_org(task: UserTask):
     try:
         logger.info("Processing /run: %s", task.user_input[:100])
         result = run_case(task.user_input)
         payload = {
             "case_id": result["case_id"],
             "user_input": result["user_input"],
@@ -103,24 +189,29 @@ def run_org(task: UserTask):
             ],
             "final_answer": result["final"]["summary"],
         }
         save_case(result["case_id"], payload)
         return payload
     except Exception as e:
         logger.exception("Error in /run")
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 @app.post("/run/debug")
 def run_org_debug(task: UserTask):
     try:
-        logger.info("Processing /run/debug: %s", task.user_input[:100])
         result = run_case(task.user_input)
         save_case(result["case_id"], result)
         return result
     except Exception as e:
         logger.exception("Error in /run/debug")
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 @app.post("/run/agent")
@@ -137,9 +228,11 @@ def run_one_agent(request: AgentRunRequest):
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
         logger.exception("Error in /run/agent")
-        raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 @app.get("/cases")
 def cases(limit: int | None = Query(default=None, ge=1, le=200)):
     return list_cases(limit=limit)
@@ -174,6 +267,8 @@ def memory_stats_endpoint():
     return memory_stats()
 @app.get("/prompts")
 def prompts():
     return list_prompts()
@@ -185,7 +280,6 @@ def prompt_detail(name: str):
         prompt = get_prompt(name)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     if not prompt:
         raise HTTPException(status_code=404, detail="Prompt not found")
     return prompt

+import asyncio
+import time
 import logging
+import os
+from fastapi import FastAPI, HTTPException, Query, Request
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
 from app.schemas import UserTask, AgentRunRequest, PromptUpdateRequest
 from app.graph import run_case
     MIROFISH_ENABLED,
     MEMORY_DIR,
     PROMPTS_DIR,
+    get_config,
 )
 from app.services.case_store import list_cases, get_case, delete_case, memory_stats
 from app.services.prompt_store import list_prompts, get_prompt, update_prompt
 from app.services.health_service import deep_health
 from app.services.agent_registry import list_agents, get_agent, run_single_agent
 from app.routers.simulation import router as simulation_router
+from app.routers.learning import router as learning_router, init_learning_services, start_scheduler_background
+from app.routers.sentinel import router as sentinel_router
+from app.routers.finance import router as finance_router
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+app = FastAPI(title="MiroOrg v1.1", version=APP_VERSION)
+# Initialize domain packs
 from app.domain_packs.init_packs import init_domain_packs
 init_domain_packs()
+# Initialize learning layer
+config = get_config()
+if config.learning_enabled:
+    try:
+        init_learning_services(config)
+        logger.info("Learning layer initialized")
+    except Exception as e:
+        logger.error(f"Failed to initialize learning layer: {e}")
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"],
 )
 app.include_router(simulation_router)
+app.include_router(learning_router)
+app.include_router(sentinel_router)
+app.include_router(finance_router)
+# ── Request Timing Middleware ─────────────────────────────────────────────────
+@app.middleware("http")
+async def timing_middleware(request: Request, call_next):
+    """Add X-Process-Time header and log slow requests."""
+    start = time.perf_counter()
+    response = await call_next(request)
+    elapsed = time.perf_counter() - start
+    response.headers["X-Process-Time"] = f"{elapsed:.3f}"
+    if elapsed > 10.0:
+        logger.warning(f"Slow request: {request.method} {request.url.path} took {elapsed:.1f}s")
+    return response
+# ── Error Handler ─────────────────────────────────────────────────────────────
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    """Sanitize error responses — don't leak internal stack traces."""
+    logger.exception(f"Unhandled error on {request.method} {request.url.path}")
+    return JSONResponse(
+        status_code=500,
+        content={"detail": "An internal error occurred. Please try again."},
+    )
+# ── Startup Event ─────────────────────────────────────────────────────────────
+@app.on_event("startup")
+async def on_startup():
+    """Start background tasks on app startup."""
+    if config.learning_enabled:
+        try:
+            start_scheduler_background()
+            logger.info("Background learning scheduler started")
+        except Exception as e:
+            logger.error(f"Failed to start learning scheduler: {e}")
+    # Start sentinel scheduler
+    sentinel_enabled = os.getenv("SENTINEL_ENABLED", "true").lower() == "true"
+    if sentinel_enabled:
+        try:
+            from app.services.sentinel.scheduler import start_sentinel_scheduler
+            start_sentinel_scheduler()
+            logger.info("Sentinel scheduler started")
+        except Exception as e:
+            logger.error(f"Failed to start sentinel scheduler: {e}")
+# ── Health ────────────────────────────────────────────────────────────────────
 @app.get("/health")
 def health():
     return {"status": "ok", "version": APP_VERSION}
     }
+# ── Agents ────────────────────────────────────────────────────────────────────
 @app.get("/agents")
 def agents():
     return list_agents()
     return agent
+# ── Case Execution ───────────────��────────────────────────────────────────────
+def _fire_and_forget_learning(payload: dict):
+    """Fire-and-forget learning from a completed case."""
+    from app.routers.learning import learning_engine as _le
+    if _le:
+        try:
+            _le.learn_from_case(payload)
+        except Exception as e:
+            logger.error(f"Learning from case failed (non-blocking): {e}")
 @app.post("/run")
 def run_org(task: UserTask):
     try:
         logger.info("Processing /run: %s", task.user_input[:100])
         result = run_case(task.user_input)
         payload = {
             "case_id": result["case_id"],
             "user_input": result["user_input"],
             ],
             "final_answer": result["final"]["summary"],
         }
         save_case(result["case_id"], payload)
+        # Fire-and-forget: learn from this case
+        _fire_and_forget_learning(payload)
         return payload
     except Exception as e:
         logger.exception("Error in /run")
+        raise HTTPException(status_code=500, detail="Failed to process request. Please try again.")
 @app.post("/run/debug")
 def run_org_debug(task: UserTask):
     try:
         result = run_case(task.user_input)
         save_case(result["case_id"], result)
+        _fire_and_forget_learning(result)
         return result
     except Exception as e:
         logger.exception("Error in /run/debug")
+        raise HTTPException(status_code=500, detail="Failed to process request. Please try again.")
 @app.post("/run/agent")
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
         logger.exception("Error in /run/agent")
+        raise HTTPException(status_code=500, detail="Failed to run agent. Please try again.")
+# ── Cases ─────────────────────────────────────────────────────────────────────
 @app.get("/cases")
 def cases(limit: int | None = Query(default=None, ge=1, le=200)):
     return list_cases(limit=limit)
     return memory_stats()
+# ── Prompts ───────────────────────────────────────────────────────────────────
 @app.get("/prompts")
 def prompts():
     return list_prompts()
         prompt = get_prompt(name)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     if not prompt:
         raise HTTPException(status_code=404, detail="Prompt not found")
     return prompt

backend/app/prompts/planner.txt CHANGED Viewed

@@ -1,18 +1,90 @@
-You are the Planner Agent in an AI organization.
-Your job:
-- take the research packet
-- produce a clear answer or step-by-step plan
-- be practical and structured
-- do not invent evidence
-- make the output actionable
-Output format:
-Plan:
-1. ...
-2. ...
-Risks:
-- ...
 Dependencies:
-- ...
-Confidence: <0 to 1>

+You are the Planner Agent in MiroOrg — a 5-layer AI intelligence organization.
+═══════════════════════════════════════════════════════════════
+ROLE
+═══════════════════════════════════════════════════════════════
+You receive the Research Agent's research packet and transform it into an actionable strategy. You are the strategic thinker — you decide WHAT should be done, in WHAT order, and with what resources. You also serve as the gateway to MiroFish simulation mode.
+═══════════════════════════════════════════════════════════════
+CAPABILITIES YOU HAVE ACCESS TO
+═══════════════════════════════════════════════════════════════
+• Full research packet with facts, entities, market data, domain insights
+• Simulation mode via MiroFish — a digital twin engine that can model multi-agent scenarios, stakeholder reactions, market impacts, and policy cascades
+• Knowledge from past cases — the system learns from previous analyses and can apply distilled skills
+═══════════════════════════════════════════════════════════════
+INSTRUCTIONS
+═══════════════════════════════════════════════════════════════
+1. ANALYZE THE RESEARCH PACKET
+   - Identify the user's core objective (what do they actually need?)
+   - Distinguish between informational needs and decision-making needs
+   - Note the complexity level: Is this a simple lookup, a multi-step analysis, or a strategic decision?
+2. CREATE A STRUCTURED PLAN
+   - Break the response into clear, numbered steps
+   - Each step should be actionable and specific
+   - Include conditional branches where outcomes are uncertain ("If X, then Y; otherwise Z")
+   - Prioritize steps by impact and urgency
+3. RISK ASSESSMENT
+   - What could go wrong with this plan?
+   - What are the key assumptions that could invalidate the strategy?
+   - What external factors could change the situation?
+   - Rate each risk: HIGH / MEDIUM / LOW impact and probability
+4. RESOURCE & DEPENDENCY MAPPING
+   - What information is needed that we don't have?
+   - What actions depend on other actions completing first?
+   - What external factors or decisions are blocking?
+5. TIMELINE ESTIMATION
+   - If the plan involves actions: suggest reasonable timeframes
+   - If the plan involves monitoring: suggest check-in intervals
+   - Note time-sensitive elements that could expire
+6. SIMULATION MODE DETECTION (CRITICAL)
+   The system has MiroFish — a powerful simulation engine that creates digital twins of scenarios and models multi-agent interactions. You MUST recommend simulation mode when ANY of these apply:
+   ALWAYS recommend simulation when the user:
+   → Asks "what if" or "what would happen if"
+   → Wants to predict outcomes or forecast trends
+   → Needs to model stakeholder reactions (market, public, government, competitors)
+   → Is evaluating policy impact or regulatory changes
+   → Needs scenario comparison (option A vs option B)
+   → Is dealing with complex multi-party dynamics
+   → Asks about public opinion, sentiment shifts, or social reactions
+   → Wants to stress-test a strategy or business decision
+   When recommending simulation, explain:
+   → WHY simulation adds value over static analysis
+   → WHAT kind of simulation would be most useful
+   → WHAT stakeholders/agents should be modeled
+═══════════════════════════════════════════════════════════════
+OUTPUT FORMAT (follow strictly)
+═══════════════════════════════════════════════════════════════
+Objective:
+<What the user actually needs, in one clear sentence>
+Strategic Plan:
+1. <Step> — <Why this step matters>
+2. <Step> — <Why this step matters>
+3. <Step> — <Why this step matters>
+   → Contingency: <If X fails, then...>
+Risk Assessment:
+- [HIGH/MEDIUM/LOW] <risk> — <probability> — <mitigation>
 Dependencies:
+- <What depends on what>
+Timeline:
+- <Step X>: <estimated timeframe or urgency>
+Simulation Recommended: YES / NO
+Simulation Rationale: <If YES: explain what kind of simulation, which stakeholders to model, and what insights it would generate. If NO: explain why static analysis is sufficient.>
+Confidence: <0.0 to 1.0>
+Reasoning: <one sentence explaining your confidence level>

backend/app/prompts/research.txt CHANGED Viewed

@@ -1,19 +1,87 @@
-You are the Research Agent in an AI organization.
-Your job:
-- extract the key facts from the user request
-- identify assumptions
-- identify missing information
-- use external API context when provided
-- return a concise research packet
-Output format:
-Facts:
-- ...
-Assumptions:
-- ...
-Open Questions:
-- ...
-Useful Signals:
-- ...
-Confidence: <0 to 1>

+You are the Research Agent in MiroOrg — a 5-layer AI intelligence organization.
+═══════════════════════════════════════════════════════════════
+ROLE
+═══════════════════════════════════════════════════════════════
+You are the first analyst in the pipeline. Your research packet becomes the foundation for every downstream agent (Planner, Verifier, Synthesizer). Thoroughness and structure here directly determines the quality of the final answer.
+═══════════════════════════════════════════════════════════════
+CAPABILITIES YOU HAVE ACCESS TO
+═══════════════════════════════════════════════════════════════
+The system automatically provides you with:
+• Web search results (Tavily) — real-time web intelligence
+• News articles (NewsAPI) — recent headlines and reporting
+• Market data (Alpha Vantage) — live stock quotes for detected tickers
+• URL content (Jina Reader) — full-text extraction from any links the user provides
+• Domain pack insights — when a specialized domain is detected (e.g., finance), you receive entity resolution, ticker mapping, sentiment signals, event analysis, and credibility scores
+• Knowledge base — the learning layer may supply distilled knowledge from past research
+Use ALL of this context. Never ignore provided data. If external context is minimal, state what is missing and why it limits your analysis.
+═══════════════════════════════════════════════════════════════
+INSTRUCTIONS
+═══════════════════════════════════════════════════════════════
+1. EXTRACT & STRUCTURE KEY FACTS
+   - Separate hard facts (verified, sourced) from soft signals (opinions, projections)
+   - Attribute each fact to its source when possible
+   - Note temporal relevance (how recent is each data point?)
+2. ENTITY RESOLUTION
+   - Identify all entities: companies, people, organizations, countries, products, concepts
+   - For companies: map to official names and stock tickers (e.g., "Apple" → Apple Inc. ($AAPL))
+   - For people: note their role and relevance
+   - For events: note dates and impact scope
+3. DOMAIN-SPECIFIC DEEP DIVE
+   When domain context is detected (finance, policy, technology, etc.):
+   - Use market data to ground quantitative claims (price, volume, market cap)
+   - Use news context to identify trending narratives and sentiment shifts
+   - Note which sources are high-credibility vs. low-credibility
+   - Extract stance signals: bullish/bearish, supportive/opposing, optimistic/pessimistic
+   - Identify event catalysts: earnings, regulatory actions, mergers, policy changes
+4. GAP ANALYSIS
+   - What critical information is NOT available?
+   - What assumptions must the user be aware of?
+   - What would change the analysis if verified differently?
+5. SIGNAL DETECTION
+   - Contradictions between sources
+   - Unusual patterns or anomalies
+   - Emerging narratives that haven't been confirmed
+   - Risks or red flags that need the Verifier's attention
+═══════════════════════════════════════════════════════════════
+OUTPUT FORMAT (follow strictly)
+═══════════════════════════════════════════════════════════════
+Key Facts:
+- [FACT] <fact> (Source: <source name>)
+- [FACT] <fact> (Source: <source name>)
+Entities Detected:
+- <entity name> — <type> — <relevance to query>
+  → Ticker: $XXX (if applicable)
+Market & Quantitative Data:
+- <data point with attribution>
+Domain Insights:
+- <domain-specific finding or signal>
+Sentiment & Stance:
+- <source/entity>: <bullish/bearish/neutral/mixed> — <brief reasoning>
+Source Assessment:
+- <source name>: <high/medium/low credibility> — <why>
+Gaps & Assumptions:
+- [GAP] <what's missing and why it matters>
+- [ASSUMPTION] <assumption being made>
+Red Flags for Verifier:
+- <anything that needs skeptical examination>
+Confidence: <0.0 to 1.0>
+Reasoning: <one sentence explaining your confidence level>

backend/app/prompts/synthesizer.txt CHANGED Viewed

@@ -1,15 +1,103 @@
-You are the Synthesizer Agent in an AI organization.
-Your job:
-- combine the research, plan, and verification review
-- produce one final answer
-- be clear, grounded, and honest about uncertainty
-- if the request sounds like a simulation/prediction task, suggest using simulation mode too
-Output format:
 Final Answer:
-...
-Uncertainty:
-...
-Next Best Step:
-...

+You are the Synthesizer Agent in MiroOrg — a 5-layer AI intelligence organization.
+═══════════════════════════════════════════════════════════════
+ROLE
+═══════════════════════════════════════════════════════════════
+You are the FINAL voice. You receive everything — the Research packet, the Planner's strategy, and the Verifier's assessment — and you produce the definitive answer that the user sees. Your output IS the product. It must be clear, honest, actionable, and carry appropriate certainty levels.
+═══════════════════════════════════════════════════════════════
+SYSTEM CAPABILITIES TO REFERENCE
+═══════════════════════════════════════════════════════════════
+You can recommend these capabilities to the user:
+• Simulation Mode (MiroFish) — for scenario modeling, "what if" analysis, stakeholder reaction modeling, multi-agent simulations, and digital twin creation
+• Domain Intelligence — specialized analysis packs (finance: market data, entity resolution, event analysis, predictions)
+• Learning Layer — the system continuously improves from past cases, tracks source trust, and evolves its prompts
+═══════════════════════════════════════════════════════════════
+INSTRUCTIONS
+═══════════════════════════════════════════════════════════════
+1. INTEGRATE ALL INPUTS
+   - Research provides the RAW FACTS and SIGNALS
+   - Planner provides the STRATEGIC FRAMEWORK
+   - Verifier provides the QUALITY ASSESSMENT and UNCERTAINTY MAP
+   Resolve conflicts between them:
+   → If Research says X but Verifier flagged X as unverified → mention X with caveat
+   → If Planner recommended action A but Verifier found it risky → recommend A with risk mitigation
+   → If sources disagree → present the disagreement honestly, don't pick sides without evidence
+2. WRITE THE FINAL ANSWER
+   The final answer must be:
+   - DIRECT: Lead with the answer, not the process. What does the user need to know FIRST?
+   - GROUNDED: Every claim should reference the evidence that supports it
+   - HONEST: State what you know, what you don't, and how confident you are
+   - ACTIONABLE: End with what the user should DO next
+   - READABLE: Use clear paragraphs, not walls of text. Use structure where helpful.
+3. UNCERTAINTY COMMUNICATION (CRITICAL)
+   Never hide uncertainty. The user trusts this system because it's honest about what it doesn't know.
+   Use these guidelines:
+   - HIGH uncertainty: Lead with a prominent caveat. "Based on limited/conflicting information..."
+   - MEDIUM uncertainty: Weave caveats naturally. "While X suggests..., there is uncertainty around..."
+   - LOW uncertainty: State with confidence but note the basis. "Based on multiple verified sources..."
+   Always specify:
+   → What would change your answer if new information emerged
+   → What the user should validate independently
+4. SIMULATION RECOMMENDATION (WHEN APPROPRIATE)
+   If the Planner recommended simulation mode, OR if you detect the user would benefit from it, actively recommend the MiroFish Simulation Lab.
+   Frame it as:
+   "💡 This question would benefit from simulation mode. MiroFish can create a digital twin of this scenario and model [specific stakeholders/dynamics]. To run a simulation, use the Simulation Lab with your scenario details."
+   Recommend simulation when:
+   → The answer involves too many unknowns to give a confident static analysis
+   → Multiple stakeholders would react differently to the same event
+   → The user is making a decision that could go multiple ways
+   → Temporal dynamics matter (how things evolve over time)
+5. CONFIDENCE CALIBRATION
+   Your confidence score must be CALIBRATED — don't default to generic values.
+   0.9–1.0: Multiple verified sources agree, well-established facts
+   0.7–0.89: Strong evidence with minor gaps, reliable sources
+   0.5–0.69: Mixed evidence, some uncertainty, qualified conclusions
+   0.3–0.49: Significant uncertainty, limited evidence, speculative elements
+   0.0–0.29: Very little evidence, highly speculative, contradictory sources
+═══════════════════════════════════════════════════════════════
+OUTPUT FORMAT (follow strictly)
+═══════════════════════════════════════════════════════════════
 Final Answer:
+<Your comprehensive, direct answer. Lead with the most important insight. Use paragraphs for readability. Ground claims in evidence. Be honest about limitations.>
+Key Findings:
+- <Most important finding with evidence basis>
+- <Second most important finding>
+- <Third most important finding>
+Uncertainty Level: HIGH / MEDIUM / LOW
+Uncertainty Details:
+- <What we're uncertain about and why>
+- <What could change this answer>
+Caveats:
+- <Important limitations the user should be aware of>
+Next Actions:
+1. <Most important thing the user should do>
+2. <Second priority action>
+3. <Optional: additional recommended steps>
+Simulation Recommended: YES / NO
+Simulation Details: <If YES: what scenario to simulate, what stakeholders to model, what insights to expect. If NO: why static analysis is sufficient.>
+Sources Used:
+- <Key sources that informed this answer>
+Confidence: <0.0 to 1.0>
+Reasoning: <one sentence explaining exactly why this confidence level>

backend/app/prompts/verifier.txt CHANGED Viewed

@@ -1,16 +1,111 @@
-You are the Verifier Agent in an AI organization.
-Your job:
-- challenge the planner
-- find unsupported claims
-- identify missing evidence or weak logic
-- suggest corrections
-- be skeptical but constructive
-Output format:
-Issues Found:
-- ...
-Corrections:
-- ...
-Approved: yes/no
-Confidence: <0 to 1>

+You are the Verifier Agent in MiroOrg — a 5-layer AI intelligence organization.
+═══════════════════════════════════════════════════════════════
+ROLE
+═══════════════════════════════════════════════════════════════
+You are the system's critical thinker and quality gatekeeper. You receive the Research packet AND the Planner's strategy, and your job is to STRESS-TEST everything before it reaches the Synthesizer. You are skeptical, thorough, and constructive. Nothing gets past you unchecked.
+═══════════════════════════════════════════════════════════════
+CAPABILITIES YOU HAVE ACCESS TO
+═══════════════════════════════════════════════════════════════
+• Domain-specific verification tools — when a specialized domain is detected (e.g., finance), the system provides:
+  - Source credibility scores for each information source
+  - Rumor detection results (flagged unverified claims)
+  - Scam detection results (flagged fraudulent patterns)
+  - Stance analysis (who is saying what, and why)
+  - Event impact assessment
+• Cross-reference capabilities from external APIs
+• Trust scores from the learning layer — historical source reliability data
+═══════════════════════════════════════════════════════════════
+INSTRUCTIONS
+═══════════════════════════════════════════════════════════════
+1. CLAIM EXTRACTION & VERIFICATION
+   - Extract every factual claim from both the Research packet and the Planner output
+   - For each claim, assess:
+     → Is this sourced? From where?
+     → Is the source reliable? (check credibility scores if provided)
+     → Is this a fact, an opinion, or a projection?
+     → Are there contradicting sources?
+     → How current is this information?
+2. LOGIC & REASONING CHECK
+   - Does the Planner's strategy logically follow from the Research?
+   - Are there logical fallacies or unsupported leaps?
+   - Are conditional statements properly structured?
+   - Are cause-effect relationships validated?
+3. BIAS & MANIPULATION DETECTION
+   - Check for confirmation bias (only supporting evidence cited)
+   - Check for selection bias (cherry-picked data)
+   - Check for framing effects (how information is presented)
+   - Check for astroturfing or coordinated narrative campaigns
+   - If financial domain: check for pump-and-dump patterns, misleading projections
+4. RUMOR & SCAM DETECTION (USE DOMAIN TOOLS)
+   When domain verification data is provided:
+   - Review all flagged rumors and rate their risk
+   - Review all flagged scams and rate their severity
+   - Note any sources that appear in known unreliable source lists
+   - Identify patterns consistent with market manipulation or misinformation
+5. UNCERTAINTY QUANTIFICATION
+   This is your MOST IMPORTANT output. The Synthesizer depends on your uncertainty assessment.
+   Rate uncertainty on THREE dimensions:
+   a) DATA COMPLETENESS: How much of the needed information do we actually have?
+      → Complete / Mostly Complete / Partial / Sparse / Missing Critical Data
+   b) SOURCE RELIABILITY: How trustworthy are the sources collectively?
+      → Highly Reliable / Generally Reliable / Mixed / Questionable / Unreliable
+   c) TEMPORAL VALIDITY: How current and still-relevant is the information?
+      → Current / Mostly Current / Aging / Stale / Outdated
+6. CORRECTION & IMPROVEMENT
+   - Don't just criticize — suggest specific corrections
+   - If a claim is wrong, state what the correct information is (if known)
+   - If a plan step is risky, suggest a safer alternative
+   - If information is missing, specify exactly what's needed
+═══════════════════════════════════════════════════════════════
+OUTPUT FORMAT (follow strictly)
+═══════════════════════════════════════════════════════════════
+Claims Verified:
+- ✅ <claim> — Verified (Source: <source>, Credibility: <high/medium/low>)
+- ⚠️ <claim> — Partially Verified (Reason: <why>)
+- ❌ <claim> — Unverified/False (Reason: <why>)
+Logic Assessment:
+- <Assessment of the Planner's reasoning quality>
+- Logical gaps found: <list or "none">
+Bias & Manipulation Flags:
+- <Any detected bias, framing, or manipulation patterns>
+Rumors Detected:
+- [RISK: HIGH/MEDIUM/LOW] <rumor description> — <why it matters>
+Scams & Red Flags:
+- [SEVERITY: HIGH/MEDIUM/LOW] <scam/red flag> — <evidence>
+Source Credibility Summary:
+- <source name>: <score or rating> — <basis for rating>
+Uncertainty Assessment:
+- Data Completeness: <rating>
+- Source Reliability: <rating>
+- Temporal Validity: <rating>
+- Overall Uncertainty: HIGH / MEDIUM / LOW
+- Key Uncertainty Factors:
+  → <factor 1>
+  → <factor 2>
+Corrections & Recommendations:
+- <specific correction or improvement>
+Approved: YES / YES WITH CAVEATS / NO
+Approval Notes: <brief explanation>
+Confidence: <0.0 to 1.0>
+Reasoning: <one sentence explaining your confidence in this verification>

backend/app/routers/finance.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""
+Finance Intelligence Router
+Exposes domain pack capabilities directly as API endpoints:
+- Ticker/company intelligence (quote + overview + news)
+- Stance detection (bullish/bearish)
+- Scam detection
+- Rumor detection
+- Source credibility
+- Event impact analysis
+- AI signal synthesis
+"""
+import logging
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from app.domain_packs.finance.ticker_resolver import extract_tickers, resolve_ticker
+from app.domain_packs.finance.entity_resolver import extract_entities
+from app.domain_packs.finance.stance_detector import detect_stance, analyze_price_action_language
+from app.domain_packs.finance.scam_detector import detect_scam_indicators
+from app.domain_packs.finance.rumor_detector import detect_rumor_indicators
+from app.domain_packs.finance.source_checker import check_source_credibility, aggregate_source_scores
+from app.domain_packs.finance.event_analyzer import analyze_event_impact, detect_event_type
+from app.domain_packs.finance.market_data import get_quote, get_company_overview, search_symbol
+from app.domain_packs.finance.news import get_company_news, get_top_headlines
+from app.domain_packs.finance.prediction import structure_prediction_context, suggest_simulation_scenarios
+from app.agents._model import call_model
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/finance", tags=["finance"])
+class AnalyzeTextRequest(BaseModel):
+    text: str
+    sources: list[str] = []
+class TickerRequest(BaseModel):
+    symbol: str
+class NewsAnalysisRequest(BaseModel):
+    query: str
+    limit: int = 8
+# ── Text Intelligence ─────────────────────────────────────────────────────────
+@router.post("/analyze/text")
+def analyze_text(req: AnalyzeTextRequest):
+    """
+    Run all domain pack analyzers on a piece of text.
+    Returns stance, scam score, rumor score, entities, tickers, events.
+    """
+    try:
+        text = req.text
+        sources = req.sources
+        tickers = extract_tickers(text)
+        entities = extract_entities(text)
+        stance = detect_stance(text)
+        price_action = analyze_price_action_language(text)
+        scam = detect_scam_indicators(text)
+        rumor = detect_rumor_indicators(text)
+        events = detect_event_type(text)
+        event_impact = analyze_event_impact(text, events)
+        source_assessment = aggregate_source_scores(sources) if sources else None
+        return {
+            "tickers": tickers,
+            "entities": [e for e in entities if e.get("confidence", 0) >= 0.7],
+            "stance": stance,
+            "price_action": price_action,
+            "scam_detection": scam,
+            "rumor_detection": rumor,
+            "event_impact": event_impact,
+            "source_assessment": source_assessment,
+        }
+    except Exception as e:
+        logger.error(f"Text analysis failed: {e}")
+        raise HTTPException(status_code=500, detail="Analysis failed")
+# ── Ticker Intelligence ───────────────────────────────────────────────────────
+@router.get("/ticker/{symbol}")
+def ticker_intelligence(symbol: str):
+    """
+    Full intelligence package for a ticker:
+    quote + company overview + recent news + AI signal
+    """
+    symbol = symbol.upper().strip()
+    try:
+        quote = get_quote(symbol)
+        overview = get_company_overview(symbol)
+        company_name = overview.get("Name") or symbol
+        news = get_company_news(company_name, days_back=7)
+        # Run stance detection on news headlines
+        headlines_text = " ".join(
+            a.get("title", "") + " " + (a.get("description") or "")
+            for a in news[:10]
+        )
+        stance = detect_stance(headlines_text) if headlines_text.strip() else {"stance": "neutral", "confidence": 0.3, "sentiment_score": 0.5}
+        events = detect_event_type(headlines_text) if headlines_text.strip() else []
+        event_impact = analyze_event_impact(headlines_text, events) if headlines_text.strip() else {}
+        # Build AI signal using LLM
+        ai_signal = _generate_ai_signal(symbol, company_name, quote, overview, news[:5], stance, events)
+        return {
+            "symbol": symbol,
+            "company_name": company_name,
+            "quote": quote,
+            "overview": {
+                "sector": overview.get("Sector"),
+                "industry": overview.get("Industry"),
+                "market_cap": overview.get("MarketCapitalization"),
+                "pe_ratio": overview.get("PERatio"),
+                "52_week_high": overview.get("52WeekHigh"),
+                "52_week_low": overview.get("52WeekLow"),
+                "analyst_target": overview.get("AnalystTargetPrice"),
+                "description": (overview.get("Description") or "")[:400],
+            },
+            "news": [
+                {
+                    "title": a.get("title"),
+                    "source": a.get("source", {}).get("name"),
+                    "url": a.get("url"),
+                    "published_at": a.get("publishedAt"),
+                    "description": (a.get("description") or "")[:200],
+                }
+                for a in news[:8]
+            ],
+            "stance": stance,
+            "event_impact": event_impact,
+            "ai_signal": ai_signal,
+        }
+    except Exception as e:
+        logger.error(f"Ticker intelligence failed for {symbol}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to fetch intelligence for {symbol}")
+@router.get("/search/{query}")
+def search_ticker(query: str):
+    """Search for ticker symbols by company name or keyword."""
+    try:
+        results = search_symbol(query)
+        return [
+            {
+                "symbol": r.get("1. symbol"),
+                "name": r.get("2. name"),
+                "type": r.get("3. type"),
+                "region": r.get("4. region"),
+                "currency": r.get("8. currency"),
+            }
+            for r in results[:8]
+        ]
+    except Exception as e:
+        logger.error(f"Symbol search failed: {e}")
+        return []
+# ── News Intelligence ─────────────────────────────────────────────────────────
+@router.post("/news/analyze")
+def analyze_news(req: NewsAnalysisRequest):
+    """
+    Fetch news for a query and run full intelligence analysis on each article.
+    """
+    try:
+        articles = get_company_news(req.query, days_back=7)
+        if not articles:
+            from app.domain_packs.finance.news import search_news
+            articles = search_news(req.query, page_size=req.limit)
+        analyzed = []
+        for article in articles[:req.limit]:
+            text = (article.get("title") or "") + " " + (article.get("description") or "")
+            url = article.get("url", "")
+            stance = detect_stance(text)
+            scam = detect_scam_indicators(text)
+            rumor = detect_rumor_indicators(text)
+            source = check_source_credibility(url) if url else None
+            analyzed.append({
+                "title": article.get("title"),
+                "source": article.get("source", {}).get("name"),
+                "url": url,
+                "published_at": article.get("publishedAt"),
+                "description": (article.get("description") or "")[:200],
+                "stance": stance.get("stance"),
+                "sentiment_score": stance.get("sentiment_score"),
+                "scam_score": scam.get("scam_score"),
+                "rumor_score": rumor.get("rumor_score"),
+                "source_credibility": source.get("credibility_score") if source else 0.5,
+            })
+        return {"query": req.query, "articles": analyzed, "total": len(analyzed)}
+    except Exception as e:
+        logger.error(f"News analysis failed: {e}")
+        raise HTTPException(status_code=500, detail="News analysis failed")
+@router.get("/headlines")
+def get_headlines():
+    """Get top business headlines with full intelligence analysis."""
+    try:
+        articles = get_top_headlines(category="business", page_size=10)
+        analyzed = []
+        for article in articles[:10]:
+            text = (article.get("title") or "") + " " + (article.get("description") or "")
+            url = article.get("url", "")
+            stance = detect_stance(text)
+            scam = detect_scam_indicators(text)
+            rumor = detect_rumor_indicators(text)
+            source = check_source_credibility(url) if url else None
+            analyzed.append({
+                "title": article.get("title"),
+                "source": article.get("source", {}).get("name"),
+                "url": url,
+                "published_at": article.get("publishedAt"),
+                "description": (article.get("description") or "")[:200],
+                "stance": stance.get("stance"),
+                "sentiment_score": stance.get("sentiment_score"),
+                "scam_score": scam.get("scam_score"),
+                "rumor_score": rumor.get("rumor_score"),
+                "source_credibility": source.get("credibility_score") if source else 0.5,
+            })
+        return analyzed
+    except Exception as e:
+        logger.error(f"Headlines fetch failed: {e}")
+        return []
+# ── AI Signal Generator ───────────────────────────────────────────────────────
+def _generate_ai_signal(
+    symbol: str,
+    company_name: str,
+    quote: dict,
+    overview: dict,
+    news: list,
+    stance: dict,
+    events: list,
+) -> dict:
+    """Generate an AI trading signal using the LLM."""
+    try:
+        price = quote.get("05. price", "N/A")
+        change_pct = quote.get("10. change percent", "N/A")
+        headlines = "\n".join(f"- {a.get('title', '')}" for a in news[:5])
+        event_names = ", ".join(e.get("event_type", "") for e in events[:3]) or "none detected"
+        prompt = f"""You are a financial intelligence analyst. Analyze this data and give a concise signal.
+Company: {company_name} ({symbol})
+Current Price: {price}
+Change Today: {change_pct}
+Market Stance from News: {stance.get('stance')} (confidence: {stance.get('confidence', 0):.0%})
+Key Events Detected: {event_names}
+Recent Headlines:
+{headlines}
+Respond in this exact JSON format (no markdown, no extra text):
+{{
+  "signal": "BUY" | "SELL" | "HOLD" | "WATCH",
+  "conviction": 0.0-1.0,
+  "reasoning": "one sentence max",
+  "risk": "LOW" | "MEDIUM" | "HIGH",
+  "timeframe": "short-term" | "medium-term" | "long-term"
+}}"""
+        raw = call_model(prompt, mode="chat")
+        # Parse JSON from response
+        import json, re
+        cleaned = raw.strip()
+        if cleaned.startswith("```"):
+            cleaned = re.sub(r"```[a-z]*\n?", "", cleaned).strip()
+        data = json.loads(cleaned)
+        return data
+    except Exception as e:
+        logger.warning(f"AI signal generation failed: {e}")
+        return {
+            "signal": "WATCH",
+            "conviction": 0.3,
+            "reasoning": "Insufficient data for confident signal",
+            "risk": "MEDIUM",
+            "timeframe": "short-term",
+        }

backend/app/routers/learning.py ADDED Viewed

	@@ -0,0 +1,282 @@

+"""
+Learning API endpoints for autonomous knowledge evolution.
+"""
+import asyncio
+import logging
+from fastapi import APIRouter, HTTPException
+from typing import List, Optional
+from ..schemas import (
+    LearningStatusResponse,
+    LearningInsightsResponse,
+    KnowledgeIngestionRequest,
+    KnowledgeItem,
+    Skill,
+    SkillDistillRequest,
+    SourceTrust,
+    PromptVersion,
+)
+from ..config import get_config
+from ..services.learning import (
+    KnowledgeIngestor,
+    KnowledgeStore,
+    LearningEngine,
+    PromptOptimizer,
+    SkillDistiller,
+    TrustManager,
+    LearningScheduler,
+)
+from ..agents._model import call_model
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/learning", tags=["learning"])
+# Global instances - initialized via init_learning_services()
+learning_engine: Optional[LearningEngine] = None
+knowledge_store: Optional[KnowledgeStore] = None
+prompt_optimizer: Optional[PromptOptimizer] = None
+skill_distiller: Optional[SkillDistiller] = None
+trust_manager: Optional[TrustManager] = None
+scheduler: Optional[LearningScheduler] = None
+async def _async_call_model(prompt: str, max_tokens: int = 1000) -> str:
+    """Async wrapper around synchronous call_model."""
+    return call_model(prompt, mode="chat")
+def init_learning_services(config):
+    """Initialize all learning services. Called from main.py on startup."""
+    global learning_engine, knowledge_store, prompt_optimizer, skill_distiller, trust_manager, scheduler
+    knowledge_store = KnowledgeStore(
+        data_dir=config.data_dir,
+        max_size_mb=config.knowledge_max_size_mb,
+    )
+    knowledge_ingestor = KnowledgeIngestor(
+        tavily_key=config.tavily_api_key,
+        newsapi_key=config.newsapi_key,
+        model_fn=_async_call_model,
+    )
+    prompt_optimizer = PromptOptimizer(
+        data_dir=config.data_dir,
+        model_fn=_async_call_model,
+    )
+    skill_distiller = SkillDistiller(
+        data_dir=config.data_dir,
+        model_fn=_async_call_model,
+    )
+    trust_manager = TrustManager(data_dir=config.data_dir)
+    learning_engine = LearningEngine(
+        knowledge_store=knowledge_store,
+        knowledge_ingestor=knowledge_ingestor,
+        prompt_optimizer=prompt_optimizer,
+        skill_distiller=skill_distiller,
+        trust_manager=trust_manager,
+    )
+    scheduler = LearningScheduler(
+        max_cpu_percent=50.0,
+        min_battery_percent=30.0,
+        check_interval_seconds=60,
+    )
+    if config.learning_enabled:
+        # Task 1: Knowledge ingestion (every 6 hours)
+        scheduler.schedule_task(
+            "knowledge_ingestion",
+            lambda: learning_engine.run_knowledge_ingestion(config.learning_topics),
+            interval_hours=config.learning_schedule_interval,
+        )
+        # Task 2: Expired knowledge cleanup (daily)
+        scheduler.schedule_task(
+            "cleanup",
+            lambda: learning_engine.run_cleanup(expiration_days=30),
+            interval_hours=24,
+        )
+        # Task 3: Pattern detection (daily)
+        async def _run_pattern_detection():
+            return learning_engine.detect_patterns()
+        scheduler.schedule_task(
+            "pattern_detection",
+            _run_pattern_detection,
+            interval_hours=24,
+        )
+        # Task 4: Skill distillation (weekly)
+        scheduler.schedule_task(
+            "skill_distillation",
+            lambda: learning_engine.run_skill_distillation(min_frequency=3),
+            interval_hours=168,
+        )
+        # Task 5: Prompt optimization (weekly)
+        scheduler.schedule_task(
+            "prompt_optimization",
+            lambda: learning_engine.run_prompt_optimization(
+                ["research", "planner", "verifier", "synthesizer"]
+            ),
+            interval_hours=168,
+        )
+    logger.info("Learning services initialized with all scheduled tasks")
+def start_scheduler_background():
+    """Start the learning scheduler as a background asyncio task."""
+    if scheduler and not scheduler.running:
+        asyncio.create_task(scheduler.start())
+        logger.info("Learning scheduler started in background")
+# ── Status ────────────────────────────────────────────────────────────────────
+@router.get("/status")
+async def get_learning_status():
+    if not learning_engine:
+        raise HTTPException(status_code=503, detail="Learning engine not initialized")
+    status = learning_engine.get_status()
+    # Include scheduler status
+    if scheduler:
+        status["scheduler"] = scheduler.get_status()
+    return status
+@router.post("/run-once")
+async def run_learning_once(task_name: str):
+    if not scheduler:
+        raise HTTPException(status_code=503, detail="Scheduler not initialized")
+    try:
+        return await scheduler.run_once(task_name)
+    except ValueError as e:
+        raise HTTPException(status_code=404, detail=str(e))
+@router.get("/insights")
+async def get_learning_insights():
+    if not learning_engine:
+        raise HTTPException(status_code=503, detail="Learning engine not initialized")
+    return learning_engine.get_insights()
+# ── Knowledge  (fixed-path routes BEFORE parameterised ones) ─────────────────
+@router.get("/knowledge")
+async def list_knowledge(limit: Optional[int] = 50):
+    if not knowledge_store:
+        raise HTTPException(status_code=503, detail="Knowledge store not initialized")
+    return knowledge_store.list_all(limit=limit)
+@router.post("/knowledge/ingest")
+async def ingest_knowledge(request: KnowledgeIngestionRequest):
+    if not learning_engine:
+        raise HTTPException(status_code=503, detail="Learning engine not initialized")
+    return await learning_engine.run_knowledge_ingestion(request.topics)
+@router.get("/knowledge/search")
+async def search_knowledge(query: str, limit: int = 10):
+    if not knowledge_store:
+        raise HTTPException(status_code=503, detail="Knowledge store not initialized")
+    return knowledge_store.search_knowledge(query, limit=limit)
+@router.get("/knowledge/{item_id}")
+async def get_knowledge_item(item_id: str):
+    if not knowledge_store:
+        raise HTTPException(status_code=503, detail="Knowledge store not initialized")
+    item = knowledge_store.get_knowledge(item_id)
+    if not item:
+        raise HTTPException(status_code=404, detail="Knowledge item not found")
+    return item
+# ── Skills  (fixed-path routes BEFORE parameterised ones) ────────────────────
+@router.get("/skills")
+async def list_skills():
+    if not skill_distiller:
+        raise HTTPException(status_code=503, detail="Skill distiller not initialized")
+    return skill_distiller.list_skills()
+@router.post("/skills/distill")
+async def distill_skills(request: SkillDistillRequest):
+    if not skill_distiller:
+        raise HTTPException(status_code=503, detail="Skill distiller not initialized")
+    from ..services.case_store import list_cases
+    cases = list_cases(limit=100)
+    candidates = skill_distiller.detect_skill_candidates(cases, min_frequency=request.min_frequency)
+    skills = []
+    for candidate in candidates[:5]:
+        example_cases = [c for c in cases if c.get("route", {}) and c.get("route", {}).get("domain_pack") == candidate.get("domain")][:3]
+        skill = await skill_distiller.distill_skill(candidate, example_cases)
+        skills.append(skill)
+    return {"candidates_found": len(candidates), "skills_distilled": len(skills), "skills": skills}
+@router.get("/skills/{skill_id}")
+async def get_skill(skill_id: str):
+    if not skill_distiller:
+        raise HTTPException(status_code=503, detail="Skill distiller not initialized")
+    skill = skill_distiller.get_skill(skill_id)
+    if not skill:
+        raise HTTPException(status_code=404, detail="Skill not found")
+    return skill
+# ── Trust & Freshness ─────────────────────────────────────────────────────────
+@router.get("/sources/trust")
+async def get_trusted_sources(min_trust: float = 0.7, min_verifications: int = 3):
+    if not trust_manager:
+        raise HTTPException(status_code=503, detail="Trust manager not initialized")
+    return trust_manager.list_trusted_sources(min_trust=min_trust, min_verifications=min_verifications)
+@router.get("/sources/freshness")
+async def get_stale_items(threshold: float = 0.3):
+    if not trust_manager or not knowledge_store:
+        raise HTTPException(status_code=503, detail="Services not initialized")
+    items = knowledge_store.list_all()
+    return trust_manager.get_stale_items(items, threshold=threshold)
+# ── Prompt Evolution  (fixed-path routes BEFORE parameterised ones) ───────────
+@router.post("/prompts/optimize/{name}")
+async def optimize_prompt(name: str, goal: str):
+    if not prompt_optimizer:
+        raise HTTPException(status_code=503, detail="Prompt optimizer not initialized")
+    from ..services.prompt_store import get_prompt
+    prompt_data = get_prompt(name)
+    if not prompt_data:
+        raise HTTPException(status_code=404, detail=f"Prompt '{name}' not found")
+    current_prompt = prompt_data["content"]
+    return await prompt_optimizer.create_prompt_variant(name, current_prompt, goal)
+@router.post("/prompts/promote/{name}/{version}")
+async def promote_prompt_version(name: str, version: str):
+    if not prompt_optimizer:
+        raise HTTPException(status_code=503, detail="Prompt optimizer not initialized")
+    success = prompt_optimizer.promote_prompt(version)
+    if not success:
+        raise HTTPException(status_code=400, detail="Promotion criteria not met (need ≥10 tests and ≥70% win rate)")
+    return {"status": "promoted", "variant_id": version}
+@router.get("/prompts/versions/{name}")
+async def get_prompt_versions(name: str):
+    if not prompt_optimizer:
+        raise HTTPException(status_code=503, detail="Prompt optimizer not initialized")
+    return prompt_optimizer.list_versions(name)

backend/app/routers/sentinel.py ADDED Viewed

	@@ -0,0 +1,330 @@

+"""
+Sentinel Router - API Endpoints
+Provides REST API for sentinel monitoring and control.
+"""
+import logging
+import os
+from typing import List, Dict, Any
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from app.services.sentinel.sentinel_engine import SentinelEngine
+from app.services.sentinel.patcher import SentinelPatcher
+from app.services.sentinel.capability_tracker import CapabilityTracker
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/sentinel", tags=["sentinel"])
+# Initialize services
+engine = SentinelEngine()
+patcher = SentinelPatcher()
+tracker = CapabilityTracker()
+# ── Request/Response Models ───────────────────────────────────────────────────
+class PatchApprovalRequest(BaseModel):
+    """Request to approve a pending patch."""
+    pass
+class PatchRejectionRequest(BaseModel):
+    """Request to reject a pending patch."""
+    pass
+# ── Helper Functions ──────────────────────────────────────────────────────────
+def _disabled_response():
+    """Return response when sentinel is disabled."""
+    return {
+        "sentinel_enabled": False,
+        "message": "Sentinel layer is disabled. Set SENTINEL_ENABLED=true to enable."
+    }
+def _check_enabled():
+    """Check if sentinel is enabled, raise HTTPException if not."""
+    if not os.getenv("SENTINEL_ENABLED", "true").lower() == "true":
+        raise HTTPException(status_code=503, detail="Sentinel layer is disabled")
+# ── Endpoints ─────────────────────────────────────────────────────────────────
+@router.get("/status")
+def get_status():
+    """
+    Get current sentinel status.
+    Returns:
+        Status dict with health metrics
+    """
+    if not os.getenv("SENTINEL_ENABLED", "true").lower() == "true":
+        return _disabled_response()
+    try:
+        return engine.get_status()
+    except Exception as e:
+        logger.error(f"Failed to get sentinel status: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get sentinel status")
+@router.get("/alerts")
+def get_alerts(limit: int = 50):
+    """
+    Get recent alerts.
+    Args:
+        limit: Maximum number of alerts to return
+    Returns:
+        List of recent alerts
+    """
+    _check_enabled()
+    try:
+        from pathlib import Path
+        import json
+        sentinel_dir = Path("backend/app/data/sentinel")
+        alert_file = sentinel_dir / "alert_history.json"
+        if not alert_file.exists():
+            return []
+        with open(alert_file, 'r') as f:
+            alerts = json.load(f)
+        # Return most recent alerts
+        return alerts[-limit:] if len(alerts) > limit else alerts
+    except Exception as e:
+        logger.error(f"Failed to get alerts: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get alerts")
+@router.get("/patches")
+def get_patches(limit: int = 50):
+    """
+    Get recent patches.
+    Args:
+        limit: Maximum number of patches to return
+    Returns:
+        List of recent patches
+    """
+    _check_enabled()
+    try:
+        from pathlib import Path
+        import json
+        sentinel_dir = Path("backend/app/data/sentinel")
+        patch_file = sentinel_dir / "patch_history.json"
+        if not patch_file.exists():
+            return []
+        with open(patch_file, 'r') as f:
+            patches = json.load(f)
+        # Return most recent patches
+        return patches[-limit:] if len(patches) > limit else patches
+    except Exception as e:
+        logger.error(f"Failed to get patches: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get patches")
+@router.get("/patches/pending")
+def get_pending_patches():
+    """
+    Get pending patches awaiting human review.
+    Returns:
+        List of pending patches
+    """
+    _check_enabled()
+    try:
+        from pathlib import Path
+        import json
+        sentinel_dir = Path("backend/app/data/sentinel")
+        pending_dir = sentinel_dir / "pending_patches"
+        if not pending_dir.exists():
+            return []
+        pending = []
+        for patch_file in pending_dir.glob("*.patch.json"):
+            try:
+                with open(patch_file, 'r') as f:
+                    patch_data = json.load(f)
+                pending.append(patch_data)
+            except Exception as e:
+                logger.warning(f"Failed to read pending patch {patch_file}: {e}")
+                continue
+        return pending
+    except Exception as e:
+        logger.error(f"Failed to get pending patches: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get pending patches")
+@router.post("/patches/{patch_id}/approve")
+def approve_patch(patch_id: str, request: PatchApprovalRequest):
+    """
+    Approve and apply a pending patch.
+    Args:
+        patch_id: ID of pending patch
+        request: Approval request (empty body)
+    Returns:
+        PatchResult
+    """
+    _check_enabled()
+    try:
+        result = patcher.approve_pending(patch_id)
+        if result is None:
+            raise HTTPException(status_code=404, detail="Pending patch not found")
+        return result.to_dict()
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to approve patch {patch_id}: {e}")
+        raise HTTPException(status_code=500, detail="Failed to approve patch")
+@router.post("/patches/{patch_id}/reject")
+def reject_patch(patch_id: str, request: PatchRejectionRequest):
+    """
+    Reject a pending patch.
+    Args:
+        patch_id: ID of pending patch
+        request: Rejection request (empty body)
+    Returns:
+        Success message
+    """
+    _check_enabled()
+    try:
+        success = patcher.reject_pending(patch_id)
+        if not success:
+            raise HTTPException(status_code=404, detail="Pending patch not found")
+        return {"rejected": True, "patch_id": patch_id}
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to reject patch {patch_id}: {e}")
+        raise HTTPException(status_code=500, detail="Failed to reject patch")
+@router.get("/capability")
+def get_capability_trend(days: int = 7):
+    """
+    Get capability trend over specified days.
+    Args:
+        days: Number of days to look back
+    Returns:
+        List of capability snapshots
+    """
+    _check_enabled()
+    try:
+        return tracker.trend(days=days)
+    except Exception as e:
+        logger.error(f"Failed to get capability trend: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get capability trend")
+@router.get("/capability/current")
+def get_current_capability():
+    """
+    Get current capability snapshot.
+    Returns:
+        Current capability snapshot
+    """
+    _check_enabled()
+    try:
+        snapshot = tracker.snapshot()
+        return snapshot.to_dict()
+    except Exception as e:
+        logger.error(f"Failed to get current capability: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get current capability")
+@router.post("/run-now")
+def run_cycle_now():
+    """
+    Trigger a sentinel cycle immediately.
+    Returns:
+        Cycle report
+    """
+    _check_enabled()
+    try:
+        report = engine.run_cycle()
+        return report.dict()
+    except Exception as e:
+        logger.error(f"Failed to run sentinel cycle: {e}")
+        raise HTTPException(status_code=500, detail="Failed to run sentinel cycle")
+@router.get("/cycle-history")
+def get_cycle_history(limit: int = 20):
+    """
+    Get recent cycle history.
+    Args:
+        limit: Maximum number of cycles to return
+    Returns:
+        List of recent cycle reports
+    """
+    _check_enabled()
+    try:
+        from pathlib import Path
+        import json
+        sentinel_dir = Path("backend/app/data/sentinel")
+        history_file = sentinel_dir / "cycle_history.json"
+        if not history_file.exists():
+            return []
+        with open(history_file, 'r') as f:
+            history = json.load(f)
+        # Return most recent cycles
+        return history[-limit:] if len(history) > limit else history
+    except Exception as e:
+        logger.error(f"Failed to get cycle history: {e}")
+        raise HTTPException(status_code=500, detail="Failed to get cycle history")

backend/app/routers/simulation.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import uuid
 from fastapi import APIRouter, HTTPException
 from app.config import MIROFISH_ENABLED
@@ -11,8 +12,9 @@ from app.services.mirofish_client import (
     simulation_chat,
     MiroFishError,
 )
-from app.services.simulation_store import save_simulation, get_simulation
 router = APIRouter(prefix="/simulation", tags=["simulation"])
@@ -21,15 +23,42 @@ def simulation_health():
     return mirofish_health()
 @router.post("/run")
 def simulation_run(payload: SimulationRunRequest):
     if not MIROFISH_ENABLED:
-        raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
     try:
         remote = run_simulation(payload.model_dump())
     except MiroFishError as e:
-        raise HTTPException(status_code=502, detail=str(e))
     simulation_id = (
         remote.get("simulation_id")
@@ -38,59 +67,107 @@ def simulation_run(payload: SimulationRunRequest):
         or str(uuid.uuid4())
     )
     record = {
         "simulation_id": simulation_id,
         "status": remote.get("status", "submitted"),
         "title": payload.title,
         "prediction_goal": payload.prediction_goal,
         "remote_payload": remote,
     }
     save_simulation(simulation_id, record)
     return record
 @router.get("/{simulation_id}")
 def simulation_status_endpoint(simulation_id: str):
     if not MIROFISH_ENABLED:
-        raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
     try:
         remote = simulation_status(simulation_id)
     except MiroFishError as e:
-        raise HTTPException(status_code=502, detail=str(e))
     local = get_simulation(simulation_id) or {}
     merged = {**local, **remote, "simulation_id": simulation_id}
     save_simulation(simulation_id, merged)
     return merged
 @router.get("/{simulation_id}/report")
 def simulation_report_endpoint(simulation_id: str):
     if not MIROFISH_ENABLED:
-        raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
     try:
         report = simulation_report(simulation_id)
     except MiroFishError as e:
-        raise HTTPException(status_code=502, detail=str(e))
     local = get_simulation(simulation_id) or {}
     merged = {**local, "report": report, "simulation_id": simulation_id}
     save_simulation(simulation_id, merged)
     return merged
 @router.post("/{simulation_id}/chat")
 def simulation_chat_endpoint(simulation_id: str, payload: SimulationChatRequest):
     if not MIROFISH_ENABLED:
-        raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
     try:
         result = simulation_chat(simulation_id, payload.message)
     except MiroFishError as e:
-        raise HTTPException(status_code=502, detail=str(e))
     return {
         "simulation_id": simulation_id,
         "message": payload.message,

 import uuid
+import logging
 from fastapi import APIRouter, HTTPException
 from app.config import MIROFISH_ENABLED
     simulation_chat,
     MiroFishError,
 )
+from app.services.simulation_store import save_simulation, get_simulation, list_simulations
+logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/simulation", tags=["simulation"])
     return mirofish_health()
+@router.get("/list")
+def simulation_list():
+    """
+    List all simulations.
+    Returns a list of all stored simulations with their status.
+    """
+    simulations = list_simulations()
+    return simulations
 @router.post("/run")
 def simulation_run(payload: SimulationRunRequest):
+    """
+    Submit a simulation request to MiroFish.
+    This endpoint creates a case-linked simulation that can be tracked
+    and referenced from case execution records.
+    """
     if not MIROFISH_ENABLED:
+        logger.warning("Simulation request rejected: MiroFish is disabled")
+        raise HTTPException(
+            status_code=400,
+            detail="MiroFish simulation service is not enabled. Please enable MIROFISH_ENABLED in configuration."
+        )
     try:
+        logger.info(f"Submitting simulation: {payload.title}")
         remote = run_simulation(payload.model_dump())
+        logger.info(f"Simulation submitted successfully")
     except MiroFishError as e:
+        logger.error(f"MiroFish simulation submission failed: {e}")
+        raise HTTPException(
+            status_code=502,
+            detail=f"MiroFish service error: {str(e)}. Please check if MiroFish service is running at the configured endpoint."
+        )
     simulation_id = (
         remote.get("simulation_id")
         or str(uuid.uuid4())
     )
+    # Store simulation with case linking support
     record = {
         "simulation_id": simulation_id,
         "status": remote.get("status", "submitted"),
         "title": payload.title,
         "prediction_goal": payload.prediction_goal,
         "remote_payload": remote,
+        "case_id": payload.metadata.get("case_id") if payload.metadata else None,  # Link to case if provided
     }
     save_simulation(simulation_id, record)
+    logger.info(f"Simulation {simulation_id} saved locally with case_id: {record.get('case_id')}")
     return record
 @router.get("/{simulation_id}")
 def simulation_status_endpoint(simulation_id: str):
+    """
+    Get the current status of a simulation.
+    Returns merged local and remote status information.
+    """
     if not MIROFISH_ENABLED:
+        logger.warning(f"Simulation status request rejected for {simulation_id}: MiroFish is disabled")
+        raise HTTPException(
+            status_code=400,
+            detail="MiroFish simulation service is not enabled."
+        )
     try:
+        logger.info(f"Fetching status for simulation {simulation_id}")
         remote = simulation_status(simulation_id)
     except MiroFishError as e:
+        logger.error(f"Failed to fetch simulation status for {simulation_id}: {e}")
+        raise HTTPException(
+            status_code=502,
+            detail=f"Failed to retrieve simulation status from MiroFish: {str(e)}"
+        )
     local = get_simulation(simulation_id) or {}
     merged = {**local, **remote, "simulation_id": simulation_id}
     save_simulation(simulation_id, merged)
+    logger.info(f"Simulation {simulation_id} status: {merged.get('status', 'unknown')}")
     return merged
 @router.get("/{simulation_id}/report")
 def simulation_report_endpoint(simulation_id: str):
+    """
+    Get the final report for a completed simulation.
+    Returns the simulation report with analysis and insights.
+    """
     if not MIROFISH_ENABLED:
+        logger.warning(f"Simulation report request rejected for {simulation_id}: MiroFish is disabled")
+        raise HTTPException(
+            status_code=400,
+            detail="MiroFish simulation service is not enabled."
+        )
     try:
+        logger.info(f"Fetching report for simulation {simulation_id}")
         report = simulation_report(simulation_id)
     except MiroFishError as e:
+        logger.error(f"Failed to fetch simulation report for {simulation_id}: {e}")
+        raise HTTPException(
+            status_code=502,
+            detail=f"Failed to retrieve simulation report from MiroFish: {str(e)}"
+        )
     local = get_simulation(simulation_id) or {}
     merged = {**local, "report": report, "simulation_id": simulation_id}
     save_simulation(simulation_id, merged)
+    logger.info(f"Simulation {simulation_id} report retrieved successfully")
     return merged
 @router.post("/{simulation_id}/chat")
 def simulation_chat_endpoint(simulation_id: str, payload: SimulationChatRequest):
+    """
+    Ask follow-up questions about a completed simulation.
+    Enables deep interaction with simulation results.
+    """
     if not MIROFISH_ENABLED:
+        logger.warning(f"Simulation chat request rejected for {simulation_id}: MiroFish is disabled")
+        raise HTTPException(
+            status_code=400,
+            detail="MiroFish simulation service is not enabled."
+        )
     try:
+        logger.info(f"Simulation chat for {simulation_id}: {payload.message[:50]}...")
         result = simulation_chat(simulation_id, payload.message)
     except MiroFishError as e:
+        logger.error(f"Simulation chat failed for {simulation_id}: {e}")
+        raise HTTPException(
+            status_code=502,
+            detail=f"Failed to chat with simulation: {str(e)}"
+        )
+    logger.info(f"Simulation chat response received for {simulation_id}")
     return {
         "simulation_id": simulation_id,
         "message": payload.message,

backend/app/schemas.py CHANGED Viewed

@@ -108,21 +108,167 @@ class SimulationStoredRecord(BaseModel):
     disk_bytes: int
-class ConfigStatusResponse(BaseModel):
-    model_name: str
-    memory_dir: str
-    prompts_dir: str
-    gemini_key_present: bool
-class AgentInfo(BaseModel):
     name: str
-    purpose: str
-    prompt_name: Optional[str] = None
-    inputs_required: List[str] = Field(default_factory=list)
-class DeepHealthResponse(BaseModel):
-    status: str
-    version: str
-    checks: Dict[str, Any]

     disk_bytes: int
+# Learning Layer Schemas
+class KnowledgeItem(BaseModel):
+    """Knowledge item with compressed content."""
+    id: Optional[str] = None
+    source: str = Field(..., description="Source: 'tavily_search', 'jina_reader', 'newsapi'")
+    query: Optional[str] = None
+    url: Optional[str] = None
+    title: Optional[str] = None
+    summary: str = Field(..., description="Compressed summary (2-4KB)")
+    published_at: Optional[str] = None
+    ingested_at: str
+    saved_at: Optional[str] = None
+    last_accessed: Optional[str] = None
+    trust_score: Optional[float] = Field(default=0.5, ge=0.0, le=1.0)
+    freshness_score: Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
+class CaseLearning(BaseModel):
+    """Learning metadata extracted from case execution."""
+    case_id: str
+    route_effectiveness: Dict[str, Any] = Field(default_factory=dict)
+    prompt_performance: Dict[str, Any] = Field(default_factory=dict)
+    provider_reliability: Dict[str, Any] = Field(default_factory=dict)
+    patterns_detected: List[str] = Field(default_factory=list)
+    learned_at: str
+class PromptVersion(BaseModel):
+    """Prompt version with A/B testing metadata."""
+    id: str
+    prompt_name: str
+    version: int
+    prompt_text: str
+    goal: str
+    status: str = Field(..., description="Status: 'testing', 'production', 'archived'")
+    created_at: str
+    test_count: int = 0
+    win_count: int = 0
+    win_rate: float = Field(default=0.0, ge=0.0, le=1.0)
+    promoted_at: Optional[str] = None
+    archived_at: Optional[str] = None
+class Skill(BaseModel):
+    """Distilled skill from execution patterns."""
+    id: str
     name: str
+    type: str = Field(..., description="Type: 'domain_expertise', 'preferred_source', 'agent_workflow'")
+    description: str
+    trigger_patterns: List[str] = Field(default_factory=list)
+    recommended_agents: List[str] = Field(default_factory=list)
+    preferred_sources: List[str] = Field(default_factory=list)
+    expected_outcomes: List[str] = Field(default_factory=list)
+    frequency: int
+    confidence: float = Field(..., ge=0.0, le=1.0)
+    created_at: str
+    usage_count: int = 0
+    success_count: int = 0
+    success_rate: float = Field(default=0.0, ge=0.0, le=1.0)
+class SourceTrust(BaseModel):
+    """Trust score for a source."""
+    source: str
+    trust_score: float = Field(..., ge=0.0, le=1.0)
+    verification_count: int
+    success_count: int
+    success_rate: float = Field(..., ge=0.0, le=1.0)
+    last_updated: str
+class FreshnessScore(BaseModel):
+    """Freshness score for a knowledge item."""
+    item_id: str
+    freshness_score: float = Field(..., ge=0.0, le=1.0)
+    age_days: int
+    last_updated: str
+class LearningStatusResponse(BaseModel):
+    """Learning engine status."""
+    enabled: bool
+    storage: Dict[str, Any]
+    last_run: Dict[str, str]
+class LearningInsightsResponse(BaseModel):
+    """Learning insights."""
+    recent_knowledge: List[Dict[str, Any]]
+    storage_stats: Dict[str, Any]
+class KnowledgeIngestionRequest(BaseModel):
+    """Request to ingest knowledge."""
+    topics: List[str] = Field(..., description="Topics to ingest knowledge about")
+class SkillDistillRequest(BaseModel):
+    """Request to distill skills."""
+    min_frequency: int = Field(default=3, description="Minimum pattern frequency")
+# Sentinel Layer Schemas
+class SentinelAlert(BaseModel):
+    """Sentinel alert from system health scan."""
+    alert_id: str
+    layer: int = Field(..., description="Layer: 1-6")
+    component: str = Field(..., description="Component name")
+    issue_type: str = Field(..., description="Issue type: error, degradation, anomaly")
+    severity: str = Field(..., description="Severity: low, medium, high, critical")
+    raw_evidence: str = Field(..., description="Raw evidence from scan")
+    timestamp: str
+class SentinelDiagnosis(BaseModel):
+    """Sentinel diagnosis of an alert."""
+    diagnosis_id: str
+    alert_id: str
+    root_cause: str
+    fix_type: str = Field(..., description="Fix type: config, prompt, logic, dependency, data")
+    safe_to_auto_apply: bool
+    proposed_fix: str
+    confidence: float = Field(..., ge=0.0, le=1.0)
+    reasoning: str
+    timestamp: str
+class PatchResult(BaseModel):
+    """Result of patch application."""
+    patch_id: str
+    applied: bool
+    target_file: str
+    change_summary: str
+    requires_human_review: bool
+    timestamp: str
+class CapabilitySnapshot(BaseModel):
+    """Capability snapshot with AGI progression index."""
+    snapshot_id: str
+    timestamp: str
+    scores: Dict[str, float] = Field(..., description="Dimension scores: reasoning_depth, source_trust_avg, skill_coverage, prompt_win_rate_avg, stability, self_correction_rate")
+    # IMPORTANT COMMENT (must appear in schemas):
+    # agi_progression_index is a VANITY METRIC for developer motivation.
+    # It is a weighted composite of system health indicators:
+    # error rate, confidence scores, trust quality, skill growth, etc.
+    # It does NOT measure general intelligence, emergent reasoning, or AGI.
+    # The name is aspirational and intentionally optimistic, not scientific.
+    agi_progression_index: float = Field(..., ge=0.0, le=1.0)
+    delta_from_last: Dict[str, float]
+class SentinelCycleReport(BaseModel):
+    """Report from a sentinel cycle."""
+    cycle_id: str
+    started_at: str
+    completed_at: str
+    alerts_found: int
+    diagnoses_made: int
+    patches_applied: int
+    patches_pending_review: int
+    capability_snapshot: CapabilitySnapshot

backend/app/services/api_discovery/__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""
+API Discovery subsystem for discovering and classifying free APIs.
+"""
+from .catalog_loader import load_public_apis_catalog
+from .classifier import classify_api
+from .scorer import score_api_usefulness
+__all__ = [
+    "load_public_apis_catalog",
+    "classify_api",
+    "score_api_usefulness",
+]

backend/app/services/api_discovery/catalog_loader.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""
+Catalog loader for public-apis dataset.
+"""
+import json
+import logging
+from typing import List, Dict, Any, Optional
+from pathlib import Path
+import httpx
+logger = logging.getLogger(__name__)
+# GitHub raw URL for public-apis catalog
+PUBLIC_APIS_URL = "https://raw.githubusercontent.com/public-apis/public-apis/master/entries.json"
+CACHE_FILE = Path(__file__).parent.parent.parent / "data" / "api_catalog_cache.json"
+def load_public_apis_catalog(use_cache: bool = True) -> List[Dict[str, Any]]:
+    """
+    Load public-apis catalog from GitHub or local cache.
+    Args:
+        use_cache: If True, use local cache if available
+    Returns:
+        List of API entries with:
+        - API: API name
+        - Description: API description
+        - Auth: Auth type (apiKey, OAuth, None, etc.)
+        - HTTPS: HTTPS support (True/False)
+        - Cors: CORS support (yes/no/unknown)
+        - Category: API category
+        - Link: API documentation URL
+    """
+    # Try to load from cache first
+    if use_cache and CACHE_FILE.exists():
+        try:
+            logger.info("Loading API catalog from cache")
+            with open(CACHE_FILE, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            logger.info(f"Loaded {len(data.get('entries', []))} APIs from cache")
+            return data.get("entries", [])
+        except Exception as e:
+            logger.warning(f"Failed to load cache: {e}")
+    # Fetch from GitHub
+    try:
+        logger.info("Fetching API catalog from GitHub")
+        with httpx.Client(timeout=30) as client:
+            response = client.get(PUBLIC_APIS_URL)
+            response.raise_for_status()
+            data = response.json()
+        entries = data.get("entries", [])
+        logger.info(f"Fetched {len(entries)} APIs from GitHub")
+        # Save to cache
+        try:
+            CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
+            with open(CACHE_FILE, "w", encoding="utf-8") as f:
+                json.dump(data, f, indent=2)
+            logger.info("Saved API catalog to cache")
+        except Exception as e:
+            logger.warning(f"Failed to save cache: {e}")
+        return entries
+    except Exception as e:
+        logger.error(f"Failed to fetch API catalog: {e}")
+        return []
+def get_api_by_name(name: str) -> Optional[Dict[str, Any]]:
+    """Get a specific API by name."""
+    catalog = load_public_apis_catalog()
+    for api in catalog:
+        if api.get("API", "").lower() == name.lower():
+            return api
+    return None
+def get_apis_by_category(category: str) -> List[Dict[str, Any]]:
+    """Get all APIs in a specific category."""
+    catalog = load_public_apis_catalog()
+    return [api for api in catalog if api.get("Category", "").lower() == category.lower()]

backend/app/services/api_discovery/classifier.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""
+API classifier for categorizing APIs by domain and use case.
+"""
+import logging
+from typing import Dict, Any, List
+logger = logging.getLogger(__name__)
+# Domain classification keywords
+DOMAIN_KEYWORDS = {
+    "market_data": ["stock", "crypto", "currency", "finance", "trading", "market", "exchange", "commodity"],
+    "news": ["news", "article", "media", "press", "journalism", "headline"],
+    "social": ["social", "twitter", "facebook", "reddit", "instagram", "community"],
+    "government": ["government", "policy", "regulation", "law", "public", "civic", "open data"],
+    "weather": ["weather", "climate", "forecast", "meteorology", "temperature"],
+    "general": [],  # Catch-all
+}
+def classify_api(api_entry: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Classify API by category and potential use cases.
+    Args:
+        api_entry: API entry from catalog with API, Description, Category, etc.
+    Returns:
+        Classification with:
+        - domain: Detected domain (market_data, news, social, government, weather, general)
+        - use_cases: List of potential use cases
+        - relevance_score: Relevance to MiroOrg domains (0.0 - 1.0)
+    """
+    name = api_entry.get("API", "").lower()
+    description = api_entry.get("Description", "").lower()
+    category = api_entry.get("Category", "").lower()
+    combined_text = f"{name} {description} {category}"
+    # Detect domain
+    detected_domain = "general"
+    max_matches = 0
+    for domain, keywords in DOMAIN_KEYWORDS.items():
+        if domain == "general":
+            continue
+        matches = sum(1 for keyword in keywords if keyword in combined_text)
+        if matches > max_matches:
+            max_matches = matches
+            detected_domain = domain
+    # Determine use cases
+    use_cases = []
+    if detected_domain == "market_data":
+        use_cases = ["financial_research", "market_analysis", "portfolio_tracking"]
+    elif detected_domain == "news":
+        use_cases = ["news_research", "sentiment_analysis", "event_detection"]
+    elif detected_domain == "social":
+        use_cases = ["social_listening", "sentiment_analysis", "trend_detection"]
+    elif detected_domain == "government":
+        use_cases = ["policy_research", "regulatory_tracking", "open_data_analysis"]
+    elif detected_domain == "weather":
+        use_cases = ["weather_forecasting", "climate_analysis"]
+    else:
+        use_cases = ["general_research"]
+    # Calculate relevance score (higher for domains we care about)
+    relevance_scores = {
+        "market_data": 1.0,
+        "news": 0.9,
+        "government": 0.8,
+        "social": 0.7,
+        "weather": 0.5,
+        "general": 0.3,
+    }
+    relevance_score = relevance_scores.get(detected_domain, 0.3)
+    return {
+        "domain": detected_domain,
+        "use_cases": use_cases,
+        "relevance_score": relevance_score,
+    }
+def classify_multiple_apis(api_entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """Classify multiple APIs and return enriched entries."""
+    results = []
+    for api in api_entries:
+        classification = classify_api(api)
+        enriched = {**api, **classification}
+        results.append(enriched)
+    return results

backend/app/services/api_discovery/metadata_store.py ADDED Viewed

	@@ -0,0 +1,60 @@

+"""
+Metadata store for API discovery results.
+"""
+import json
+import logging
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+logger = logging.getLogger(__name__)
+METADATA_DIR = Path(__file__).parent.parent.parent / "data" / "api_metadata"
+METADATA_DIR.mkdir(parents=True, exist_ok=True)
+def save_api_metadata(api_name: str, metadata: Dict[str, Any]) -> None:
+    """Save API metadata to local storage."""
+    safe_name = api_name.replace("/", "_").replace(" ", "_")
+    path = METADATA_DIR / f"{safe_name}.json"
+    data = {
+        **metadata,
+        "saved_at": datetime.utcnow().isoformat(),
+    }
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2)
+    logger.info(f"Saved metadata for API: {api_name}")
+def get_api_metadata(api_name: str) -> Optional[Dict[str, Any]]:
+    """Get API metadata from local storage."""
+    safe_name = api_name.replace("/", "_").replace(" ", "_")
+    path = METADATA_DIR / f"{safe_name}.json"
+    if not path.exists():
+        return None
+    with open(path, "r", encoding="utf-8") as f:
+        return json.load(f)
+def list_saved_apis() -> List[str]:
+    """List all APIs with saved metadata."""
+    return [p.stem for p in METADATA_DIR.glob("*.json")]
+def delete_api_metadata(api_name: str) -> bool:
+    """Delete API metadata."""
+    safe_name = api_name.replace("/", "_").replace(" ", "_")
+    path = METADATA_DIR / f"{safe_name}.json"
+    if not path.exists():
+        return False
+    path.unlink()
+    logger.info(f"Deleted metadata for API: {api_name}")
+    return True

backend/app/services/api_discovery/scorer.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""
+API scorer for prioritizing APIs for integration.
+"""
+import logging
+from typing import Dict, Any
+logger = logging.getLogger(__name__)
+def score_api_usefulness(api_entry: Dict[str, Any]) -> float:
+    """
+    Score API for integration priority (0.0 - 1.0).
+    Factors:
+    - Auth simplicity (no auth > apiKey > OAuth)
+    - HTTPS support (required)
+    - CORS support (preferred)
+    - Category relevance to domain packs
+    - Description quality
+    Args:
+        api_entry: API entry from catalog
+    Returns:
+        Usefulness score (0.0 - 1.0)
+    """
+    score = 0.0
+    # Auth simplicity (30% weight)
+    auth = api_entry.get("Auth", "").lower()
+    if auth == "" or auth == "no":
+        score += 0.30  # No auth is easiest
+    elif "apikey" in auth or "api key" in auth:
+        score += 0.20  # API key is simple
+    elif "oauth" in auth:
+        score += 0.10  # OAuth is complex
+    else:
+        score += 0.05  # Unknown auth
+    # HTTPS support (25% weight) - required for security
+    https = api_entry.get("HTTPS", False)
+    if https:
+        score += 0.25
+    # CORS support (15% weight) - preferred for frontend
+    cors = api_entry.get("Cors", "").lower()
+    if cors == "yes":
+        score += 0.15
+    elif cors == "unknown":
+        score += 0.05
+    # Category relevance (20% weight)
+    category = api_entry.get("Category", "").lower()
+    relevance_categories = {
+        "finance": 1.0,
+        "news": 0.9,
+        "government": 0.8,
+        "social": 0.7,
+        "business": 0.8,
+        "cryptocurrency": 0.9,
+        "weather": 0.6,
+    }
+    category_score = 0.0
+    for cat, weight in relevance_categories.items():
+        if cat in category:
+            category_score = max(category_score, weight)
+    score += 0.20 * category_score
+    # Description quality (10% weight)
+    description = api_entry.get("Description", "")
+    if len(description) > 50:
+        score += 0.10
+    elif len(description) > 20:
+        score += 0.05
+    return min(score, 1.0)  # Cap at 1.0
+def rank_apis(api_entries: list[Dict[str, Any]], top_n: int = 10) -> list[Dict[str, Any]]:
+    """
+    Rank APIs by usefulness score and return top N.
+    Args:
+        api_entries: List of API entries
+        top_n: Number of top APIs to return
+    Returns:
+        Top N APIs sorted by score (descending)
+    """
+    scored_apis = []
+    for api in api_entries:
+        score = score_api_usefulness(api)
+        scored_apis.append({**api, "usefulness_score": score})
+    # Sort by score descending
+    scored_apis.sort(key=lambda x: x.get("usefulness_score", 0.0), reverse=True)
+    return scored_apis[:top_n]

backend/app/services/case_store.py CHANGED Viewed

@@ -34,6 +34,8 @@ def list_cases(limit: Optional[int] = None) -> List[Dict[str, Any]]:
                     "user_input": data.get("user_input", ""),
                     "saved_at": data.get("saved_at"),
                     "final_answer_preview": str(data.get("final_answer", ""))[:200],
                 }
             )
         except Exception:
@@ -60,6 +62,20 @@ def delete_case(case_id: str) -> bool:
     return True
 def memory_stats() -> Dict[str, Any]:
     files = list(_memory_dir().glob("*.json"))
     files_sorted = sorted(files, key=lambda p: p.stat().st_mtime, reverse=True)

                     "user_input": data.get("user_input", ""),
                     "saved_at": data.get("saved_at"),
                     "final_answer_preview": str(data.get("final_answer", ""))[:200],
+                    "simulation_id": data.get("simulation_id"),  # Include simulation link
+                    "route": data.get("route"),  # Include route for dashboard display
                 }
             )
         except Exception:
     return True
+def get_cases_by_simulation(simulation_id: str) -> List[Dict[str, Any]]:
+    """Get all cases linked to a specific simulation."""
+    all_cases = []
+    for path in _memory_dir().glob("*.json"):
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if data.get("simulation_id") == simulation_id:
+                all_cases.append(data)
+        except Exception:
+            continue
+    return all_cases
 def memory_stats() -> Dict[str, Any]:
     files = list(_memory_dir().glob("*.json"))
     files_sorted = sorted(files, key=lambda p: p.stat().st_mtime, reverse=True)

backend/app/services/external_sources.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import re
 from typing import List, Dict, Any, Optional
 import httpx
@@ -10,6 +12,17 @@ from app.config import (
     JINA_READER_BASE,
 )
 URL_PATTERN = re.compile(r"https?://\S+")
 TICKER_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")
@@ -50,13 +63,14 @@ def tavily_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
             "search_depth": "basic",
             "max_results": max_results,
         }
-        with httpx.Client(timeout=30) as client:
-            response = client.post("https://api.tavily.com/search", json=payload)
         if response.status_code >= 400:
             return []
         data = response.json()
         return data.get("results", [])
-    except Exception:
         return []
@@ -72,13 +86,14 @@ def news_search(query: str, page_size: int = 5) -> List[Dict[str, Any]]:
             "sortBy": "publishedAt",
             "apiKey": NEWSAPI_KEY,
         }
-        with httpx.Client(timeout=30) as client:
-            response = client.get("https://newsapi.org/v2/everything", params=params)
         if response.status_code >= 400:
             return []
         data = response.json()
         return data.get("articles", [])
-    except Exception:
         return []
@@ -86,19 +101,31 @@ def market_quote(symbol: str) -> Dict[str, Any]:
     if not ALPHAVANTAGE_API_KEY or not symbol:
         return {}
     try:
         params = {
             "function": "GLOBAL_QUOTE",
             "symbol": symbol,
             "apikey": ALPHAVANTAGE_API_KEY,
         }
-        with httpx.Client(timeout=30) as client:
-            response = client.get("https://www.alphavantage.co/query", params=params)
         if response.status_code >= 400:
             return {}
         data = response.json()
-        return data.get("Global Quote", {})
-    except Exception:
         return {}

 import re
+import time
+import logging
 from typing import List, Dict, Any, Optional
 import httpx
     JINA_READER_BASE,
 )
+logger = logging.getLogger(__name__)
+# Module-level connection pool
+_http_pool = httpx.Client(
+    timeout=30,
+    limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
+)
+# Simple TTL cache for market quotes (5 min)
+_quote_cache: Dict[str, Dict[str, Any]] = {}  # {symbol: {"data": ..., "ts": ...}}
+_QUOTE_TTL = 300  # seconds
 URL_PATTERN = re.compile(r"https?://\S+")
 TICKER_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")
             "search_depth": "basic",
             "max_results": max_results,
         }
+        response = _http_pool.post("https://api.tavily.com/search", json=payload)
         if response.status_code >= 400:
+            logger.warning(f"Tavily search returned {response.status_code}")
             return []
         data = response.json()
         return data.get("results", [])
+    except Exception as e:
+        logger.error(f"Tavily search error: {e}")
         return []
             "sortBy": "publishedAt",
             "apiKey": NEWSAPI_KEY,
         }
+        response = _http_pool.get("https://newsapi.org/v2/everything", params=params)
         if response.status_code >= 400:
+            logger.warning(f"NewsAPI returned {response.status_code}")
             return []
         data = response.json()
         return data.get("articles", [])
+    except Exception as e:
+        logger.error(f"NewsAPI error: {e}")
         return []
     if not ALPHAVANTAGE_API_KEY or not symbol:
         return {}
+    # Check cache first
+    cached = _quote_cache.get(symbol)
+    if cached and (time.time() - cached["ts"]) < _QUOTE_TTL:
+        logger.debug(f"Market quote cache hit: {symbol}")
+        return cached["data"]
     try:
         params = {
             "function": "GLOBAL_QUOTE",
             "symbol": symbol,
             "apikey": ALPHAVANTAGE_API_KEY,
         }
+        response = _http_pool.get("https://www.alphavantage.co/query", params=params)
         if response.status_code >= 400:
+            logger.warning(f"Alpha Vantage returned {response.status_code}")
             return {}
         data = response.json()
+        quote = data.get("Global Quote", {})
+        # Cache the result
+        _quote_cache[symbol] = {"data": quote, "ts": time.time()}
+        return quote
+    except Exception as e:
+        logger.error(f"Alpha Vantage error: {e}")
         return {}

backend/app/services/learning/__init__.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""
+Learning subsystem for autonomous knowledge evolution.
+This module provides the infrastructure for the system to improve itself
+over time without local model training. It includes:
+- Knowledge ingestion from external sources
+- Experience learning from case execution
+- Prompt evolution through A/B testing
+- Skill distillation from repeated patterns
+- Trust and freshness management
+"""
+from .knowledge_ingestor import KnowledgeIngestor
+from .knowledge_store import KnowledgeStore
+from .learning_engine import LearningEngine
+from .prompt_optimizer import PromptOptimizer
+from .skill_distiller import SkillDistiller
+from .trust_manager import TrustManager
+from .scheduler import LearningScheduler
+__all__ = [
+    "KnowledgeIngestor",
+    "KnowledgeStore",
+    "LearningEngine",
+    "PromptOptimizer",
+    "SkillDistiller",
+    "TrustManager",
+    "LearningScheduler",
+]

backend/app/services/learning/freshness_manager.py ADDED Viewed

	@@ -0,0 +1,10 @@

+"""
+Freshness management for knowledge items.
+This is a convenience wrapper around TrustManager's freshness functionality.
+"""
+from .trust_manager import TrustManager
+# Re-export for convenience
+__all__ = ["TrustManager"]

backend/app/services/learning/knowledge_ingestor.py ADDED Viewed

	@@ -0,0 +1,193 @@

+"""
+Knowledge ingestion from external sources.
+Ingests knowledge from web search, URLs, and news sources,
+compressing content to 2-4KB summaries for efficient storage.
+"""
+import logging
+from typing import Dict, Any, Optional
+import httpx
+from datetime import datetime
+logger = logging.getLogger(__name__)
+class KnowledgeIngestor:
+    """Ingests knowledge from external sources."""
+    def __init__(self, tavily_key: Optional[str], newsapi_key: Optional[str], model_fn):
+        self.tavily_key = tavily_key
+        self.newsapi_key = newsapi_key
+        self.model_fn = model_fn
+        self.jina_reader_base = "https://r.jina.ai/"
+    async def ingest_from_search(self, query: str, max_results: int = 5) -> list[Dict[str, Any]]:
+        """
+        Ingest knowledge from web search using Tavily API.
+        Args:
+            query: Search query
+            max_results: Maximum number of results to ingest
+        Returns:
+            List of knowledge items with compressed content
+        """
+        if not self.tavily_key:
+            logger.warning("Tavily API key not configured")
+            return []
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.post(
+                    "https://api.tavily.com/search",
+                    json={
+                        "api_key": self.tavily_key,
+                        "query": query,
+                        "max_results": max_results,
+                    }
+                )
+                response.raise_for_status()
+                data = response.json()
+            items = []
+            for result in data.get("results", []):
+                summary = await self.compress_content(result.get("content", ""))
+                items.append({
+                    "source": "tavily_search",
+                    "query": query,
+                    "url": result.get("url"),
+                    "title": result.get("title"),
+                    "summary": summary,
+                    "ingested_at": datetime.utcnow().isoformat(),
+                })
+            logger.info(f"Ingested {len(items)} items from Tavily search: {query}")
+            return items
+        except Exception as e:
+            logger.error(f"Failed to ingest from Tavily search: {e}")
+            return []
+    async def ingest_from_url(self, url: str) -> Optional[Dict[str, Any]]:
+        """
+        Ingest knowledge from a specific URL using Jina Reader.
+        Args:
+            url: URL to ingest
+        Returns:
+            Knowledge item with compressed content, or None if failed
+        """
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.get(f"{self.jina_reader_base}{url}")
+                response.raise_for_status()
+                content = response.text
+            summary = await self.compress_content(content)
+            item = {
+                "source": "jina_reader",
+                "url": url,
+                "summary": summary,
+                "ingested_at": datetime.utcnow().isoformat(),
+            }
+            logger.info(f"Ingested content from URL: {url}")
+            return item
+        except Exception as e:
+            logger.error(f"Failed to ingest from URL {url}: {e}")
+            return None
+    async def ingest_from_news(self, query: str, max_results: int = 10) -> list[Dict[str, Any]]:
+        """
+        Ingest knowledge from news sources using NewsAPI.
+        Args:
+            query: News search query
+            max_results: Maximum number of articles to ingest
+        Returns:
+            List of knowledge items with compressed content
+        """
+        if not self.newsapi_key:
+            logger.warning("NewsAPI key not configured")
+            return []
+        try:
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                response = await client.get(
+                    "https://newsapi.org/v2/everything",
+                    params={
+                        "apiKey": self.newsapi_key,
+                        "q": query,
+                        "pageSize": max_results,
+                        "sortBy": "publishedAt",
+                    }
+                )
+                response.raise_for_status()
+                data = response.json()
+            items = []
+            for article in data.get("articles", []):
+                content = f"{article.get('title', '')} {article.get('description', '')} {article.get('content', '')}"
+                summary = await self.compress_content(content)
+                items.append({
+                    "source": "newsapi",
+                    "query": query,
+                    "url": article.get("url"),
+                    "title": article.get("title"),
+                    "published_at": article.get("publishedAt"),
+                    "summary": summary,
+                    "ingested_at": datetime.utcnow().isoformat(),
+                })
+            logger.info(f"Ingested {len(items)} articles from NewsAPI: {query}")
+            return items
+        except Exception as e:
+            logger.error(f"Failed to ingest from NewsAPI: {e}")
+            return []
+    async def compress_content(self, content: str) -> str:
+        """
+        Compress content to 2-4KB summary using LLM.
+        Args:
+            content: Raw content to compress
+        Returns:
+            Compressed summary (2-4KB)
+        """
+        if not content:
+            return ""
+        # If already small enough, return as-is
+        if len(content.encode('utf-8')) <= 4096:
+            return content
+        prompt = f"""Summarize the following content into a concise summary of 500-1000 words.
+Focus on key facts, insights, and actionable information.
+Content:
+{content[:10000]}
+Summary:"""
+        try:
+            summary = await self.model_fn(prompt, max_tokens=1500)
+            # Ensure summary is within 2-4KB range
+            summary_bytes = summary.encode('utf-8')
+            if len(summary_bytes) > 4096:
+                # Truncate if too long
+                summary = summary_bytes[:4096].decode('utf-8', errors='ignore')
+            return summary
+        except Exception as e:
+            logger.error(f"Failed to compress content: {e}")
+            # Return truncated content as fallback
+            return content[:4096]

backend/app/services/learning/knowledge_store.py ADDED Viewed

	@@ -0,0 +1,225 @@

+"""
+Knowledge storage with LRU eviction and expiration management.
+Stores knowledge items as JSON files with 200MB storage limit.
+Implements LRU eviction when limit is reached.
+"""
+import json
+import logging
+import os
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+import uuid
+logger = logging.getLogger(__name__)
+class KnowledgeStore:
+    """Stores and retrieves knowledge items with storage limits."""
+    def __init__(self, data_dir: str, max_size_mb: int = 200):
+        self.data_dir = Path(data_dir) / "knowledge"
+        self.max_size_bytes = max_size_mb * 1024 * 1024
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+    def save_knowledge(self, item: Dict[str, Any]) -> str:
+        """
+        Save a knowledge item to storage.
+        Args:
+            item: Knowledge item to save
+        Returns:
+            Item ID
+        """
+        # Generate ID if not present
+        if "id" not in item:
+            item["id"] = str(uuid.uuid4())
+        # Add metadata
+        item["saved_at"] = datetime.utcnow().isoformat()
+        item["last_accessed"] = datetime.utcnow().isoformat()
+        # Check storage limit and evict if needed
+        self._enforce_storage_limit()
+        # Save to file
+        file_path = self.data_dir / f"{item['id']}.json"
+        with open(file_path, 'w') as f:
+            json.dump(item, f, indent=2)
+        logger.info(f"Saved knowledge item: {item['id']}")
+        return item["id"]
+    def get_knowledge(self, item_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Retrieve a knowledge item by ID.
+        Args:
+            item_id: Item ID
+        Returns:
+            Knowledge item or None if not found
+        """
+        file_path = self.data_dir / f"{item_id}.json"
+        if not file_path.exists():
+            return None
+        with open(file_path, 'r') as f:
+            item = json.load(f)
+        # Update last accessed time
+        item["last_accessed"] = datetime.utcnow().isoformat()
+        with open(file_path, 'w') as f:
+            json.dump(item, f, indent=2)
+        return item
+    def search_knowledge(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
+        """
+        Search knowledge items by query.
+        Args:
+            query: Search query
+            limit: Maximum number of results
+        Returns:
+            List of matching knowledge items
+        """
+        results = []
+        query_lower = query.lower()
+        for file_path in self.data_dir.glob("*.json"):
+            try:
+                with open(file_path, 'r') as f:
+                    item = json.load(f)
+                # Simple text search in title and summary
+                title = item.get("title", "").lower()
+                summary = item.get("summary", "").lower()
+                if query_lower in title or query_lower in summary:
+                    results.append(item)
+                if len(results) >= limit:
+                    break
+            except Exception as e:
+                logger.error(f"Failed to read knowledge item {file_path}: {e}")
+        # Sort by relevance (title match first, then by recency)
+        results.sort(key=lambda x: (
+            query_lower not in x.get("title", "").lower(),
+            x.get("saved_at", "")
+        ), reverse=True)
+        return results[:limit]
+    def list_all(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
+        """
+        List all knowledge items.
+        Args:
+            limit: Maximum number of items to return
+        Returns:
+            List of knowledge items
+        """
+        items = []
+        for file_path in self.data_dir.glob("*.json"):
+            try:
+                with open(file_path, 'r') as f:
+                    item = json.load(f)
+                items.append(item)
+            except Exception as e:
+                logger.error(f"Failed to read knowledge item {file_path}: {e}")
+        # Sort by saved_at descending
+        items.sort(key=lambda x: x.get("saved_at", ""), reverse=True)
+        if limit:
+            return items[:limit]
+        return items
+    def delete_expired_knowledge(self, expiration_days: int = 30) -> int:
+        """
+        Delete expired knowledge items.
+        Args:
+            expiration_days: Number of days before expiration
+        Returns:
+            Number of items deleted
+        """
+        cutoff = datetime.utcnow() - timedelta(days=expiration_days)
+        deleted_count = 0
+        for file_path in self.data_dir.glob("*.json"):
+            try:
+                with open(file_path, 'r') as f:
+                    item = json.load(f)
+                saved_at = datetime.fromisoformat(item.get("saved_at", ""))
+                if saved_at < cutoff:
+                    file_path.unlink()
+                    deleted_count += 1
+                    logger.info(f"Deleted expired knowledge item: {item.get('id')}")
+            except Exception as e:
+                logger.error(f"Failed to check expiration for {file_path}: {e}")
+        logger.info(f"Deleted {deleted_count} expired knowledge items")
+        return deleted_count
+    def _enforce_storage_limit(self):
+        """Enforce storage limit using LRU eviction."""
+        current_size = self._get_storage_size()
+        if current_size <= self.max_size_bytes:
+            return
+        logger.warning(f"Storage limit exceeded: {current_size / 1024 / 1024:.2f}MB / {self.max_size_bytes / 1024 / 1024:.2f}MB")
+        # Get all items sorted by last_accessed (LRU)
+        items = []
+        for file_path in self.data_dir.glob("*.json"):
+            try:
+                with open(file_path, 'r') as f:
+                    item = json.load(f)
+                items.append((file_path, item.get("last_accessed", "")))
+            except Exception as e:
+                logger.error(f"Failed to read {file_path}: {e}")
+        items.sort(key=lambda x: x[1])  # Sort by last_accessed ascending
+        # Delete oldest items until under limit
+        for file_path, _ in items:
+            if current_size <= self.max_size_bytes:
+                break
+            file_size = file_path.stat().st_size
+            file_path.unlink()
+            current_size -= file_size
+            logger.info(f"Evicted knowledge item (LRU): {file_path.name}")
+    def _get_storage_size(self) -> int:
+        """Get total storage size in bytes."""
+        total_size = 0
+        for file_path in self.data_dir.glob("*.json"):
+            total_size += file_path.stat().st_size
+        return total_size
+    def get_storage_stats(self) -> Dict[str, Any]:
+        """Get storage statistics."""
+        total_size = self._get_storage_size()
+        item_count = len(list(self.data_dir.glob("*.json")))
+        return {
+            "total_size_mb": total_size / 1024 / 1024,
+            "max_size_mb": self.max_size_bytes / 1024 / 1024,
+            "usage_percent": (total_size / self.max_size_bytes) * 100 if self.max_size_bytes > 0 else 0,
+            "item_count": item_count,
+        }

backend/app/services/learning/learning_engine.py ADDED Viewed

	@@ -0,0 +1,439 @@

+"""
+Core learning engine that coordinates all learning activities.
+Orchestrates knowledge ingestion, experience learning, prompt evolution,
+skill distillation, trust management, and freshness management.
+"""
+import logging
+from typing import Dict, Any, List, Optional
+from datetime import datetime
+from collections import Counter
+from .knowledge_ingestor import KnowledgeIngestor
+from .knowledge_store import KnowledgeStore
+from .prompt_optimizer import PromptOptimizer
+from .skill_distiller import SkillDistiller
+from .trust_manager import TrustManager
+logger = logging.getLogger(__name__)
+class LearningEngine:
+    """Coordinates all learning activities."""
+    def __init__(
+        self,
+        knowledge_store: KnowledgeStore,
+        knowledge_ingestor: KnowledgeIngestor,
+        prompt_optimizer: Optional[PromptOptimizer] = None,
+        skill_distiller: Optional[SkillDistiller] = None,
+        trust_manager: Optional[TrustManager] = None,
+    ):
+        self.knowledge_store = knowledge_store
+        self.knowledge_ingestor = knowledge_ingestor
+        self.prompt_optimizer = prompt_optimizer
+        self.skill_distiller = skill_distiller
+        self.trust_manager = trust_manager
+        self.last_run: Dict[str, str] = {}
+        # In-memory learning metadata (flushed periodically)
+        self._case_learnings: List[Dict[str, Any]] = []
+        self._route_stats: Counter = Counter()
+        self._provider_stats: Dict[str, Dict[str, int]] = {}
+        self._prompt_performance: Dict[str, Dict[str, Any]] = {}
+    # ── Knowledge Ingestion ──────────────────────────────────────────────────
+    async def run_knowledge_ingestion(self, topics: list[str]) -> Dict[str, Any]:
+        """
+        Run knowledge ingestion for specified topics.
+        Args:
+            topics: List of topics to ingest knowledge about
+        Returns:
+            Ingestion results
+        """
+        logger.info(f"Running knowledge ingestion for topics: {topics}")
+        total_items = 0
+        results = []
+        for topic in topics:
+            search_items = await self.knowledge_ingestor.ingest_from_search(topic)
+            for item in search_items:
+                self.knowledge_store.save_knowledge(item)
+                total_items += 1
+            news_items = await self.knowledge_ingestor.ingest_from_news(topic)
+            for item in news_items:
+                self.knowledge_store.save_knowledge(item)
+                total_items += 1
+            results.append({
+                "topic": topic,
+                "search_items": len(search_items),
+                "news_items": len(news_items),
+            })
+        self.last_run["knowledge_ingestion"] = datetime.utcnow().isoformat()
+        logger.info(f"Knowledge ingestion complete: {total_items} items ingested")
+        return {
+            "total_items": total_items,
+            "results": results,
+            "timestamp": self.last_run["knowledge_ingestion"],
+        }
+    async def run_cleanup(self, expiration_days: int = 30) -> Dict[str, Any]:
+        """Run cleanup of expired knowledge."""
+        logger.info("Running knowledge cleanup")
+        deleted_count = self.knowledge_store.delete_expired_knowledge(expiration_days)
+        self.last_run["cleanup"] = datetime.utcnow().isoformat()
+        return {
+            "deleted_count": deleted_count,
+            "timestamp": self.last_run["cleanup"],
+        }
+    # ── Experience Learning (Task 34) ────────────────────────────────────────
+    def learn_from_case(self, case_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Extract learning metadata from a completed case execution.
+        Args:
+            case_data: Complete case payload
+        Returns:
+            Learning metadata extracted
+        """
+        case_id = case_data.get("case_id", "unknown")
+        logger.info(f"Learning from case {case_id}")
+        route = case_data.get("route", {})
+        outputs = case_data.get("outputs", [])
+        # 1. Track route effectiveness
+        route_key = f"{route.get('domain_pack', 'general')}:{route.get('execution_mode', 'standard')}"
+        self._route_stats[route_key] += 1
+        # 2. Track which agents produced useful output
+        agents_used = []
+        agent_quality = {}
+        for output in outputs:
+            if isinstance(output, dict):
+                agent_name = output.get("agent", "unknown")
+                summary = output.get("summary", "")
+                confidence = output.get("confidence", 0.0)
+                agents_used.append(agent_name)
+                agent_quality[agent_name] = {
+                    "output_length": len(summary),
+                    "confidence": confidence,
+                    "produced_output": len(summary) > 10,
+                }
+        # 3. Build learning record
+        learning = {
+            "case_id": case_id,
+            "route": route,
+            "agents_used": agents_used,
+            "agent_quality": agent_quality,
+            "domain": route.get("domain_pack", "general"),
+            "complexity": route.get("complexity", "medium"),
+            "execution_mode": route.get("execution_mode", "standard"),
+            "learned_at": datetime.utcnow().isoformat(),
+        }
+        self._case_learnings.append(learning)
+        # Keep only last 500 learnings in memory
+        if len(self._case_learnings) > 500:
+            self._case_learnings = self._case_learnings[-500:]
+        # 4. Update trust scores for sources mentioned in research
+        if self.trust_manager:
+            for output in outputs:
+                if isinstance(output, dict) and output.get("agent") == "research":
+                    sources = output.get("details", {}).get("sources", [])
+                    for source in sources:
+                        if isinstance(source, str):
+                            self.trust_manager.update_trust(source, True, weight=0.5)
+        logger.info(f"Learned from case {case_id}: route={route_key}, agents={agents_used}")
+        return learning
+    def detect_patterns(self, min_frequency: int = 3) -> List[Dict[str, Any]]:
+        """
+        Detect patterns in recent case executions.
+        Args:
+            min_frequency: Minimum occurrences to count as a pattern
+        Returns:
+            List of detected patterns
+        """
+        if len(self._case_learnings) < min_frequency:
+            return []
+        patterns = []
+        # Pattern 1: Domain frequency
+        domain_counts = Counter(l["domain"] for l in self._case_learnings)
+        for domain, count in domain_counts.items():
+            if count >= min_frequency:
+                patterns.append({
+                    "type": "domain_frequency",
+                    "domain": domain,
+                    "count": count,
+                    "percentage": count / len(self._case_learnings) * 100,
+                })
+        # Pattern 2: Execution mode frequency
+        mode_counts = Counter(l["execution_mode"] for l in self._case_learnings)
+        for mode, count in mode_counts.items():
+            if count >= min_frequency:
+                patterns.append({
+                    "type": "execution_mode_frequency",
+                    "mode": mode,
+                    "count": count,
+                    "percentage": count / len(self._case_learnings) * 100,
+                })
+        # Pattern 3: Agent combinations that produce high confidence
+        high_confidence_combos = Counter()
+        for learning in self._case_learnings:
+            quality = learning.get("agent_quality", {})
+            high_conf_agents = [
+                a for a, q in quality.items()
+                if q.get("confidence", 0) > 0.7
+            ]
+            if high_conf_agents:
+                high_confidence_combos[tuple(sorted(high_conf_agents))] += 1
+        for combo, count in high_confidence_combos.items():
+            if count >= min_frequency:
+                patterns.append({
+                    "type": "high_confidence_agents",
+                    "agents": list(combo),
+                    "count": count,
+                })
+        self.last_run["pattern_detection"] = datetime.utcnow().isoformat()
+        logger.info(f"Detected {len(patterns)} patterns from {len(self._case_learnings)} cases")
+        return patterns
+    def get_route_effectiveness(self) -> Dict[str, Any]:
+        """Get route effectiveness insights."""
+        total = sum(self._route_stats.values())
+        if total == 0:
+            return {"total_cases": 0, "routes": {}}
+        return {
+            "total_cases": total,
+            "routes": {
+                route: {
+                    "count": count,
+                    "percentage": round(count / total * 100, 1),
+                }
+                for route, count in self._route_stats.most_common()
+            },
+        }
+    def get_prompt_performance(self) -> Dict[str, Any]:
+        """Get prompt performance insights from case learnings."""
+        if not self._case_learnings:
+            return {"total_cases": 0, "agents": {}}
+        agent_stats: Dict[str, Dict[str, Any]] = {}
+        for learning in self._case_learnings:
+            for agent, quality in learning.get("agent_quality", {}).items():
+                if agent not in agent_stats:
+                    agent_stats[agent] = {
+                        "total_runs": 0,
+                        "total_confidence": 0.0,
+                        "produced_output_count": 0,
+                    }
+                agent_stats[agent]["total_runs"] += 1
+                agent_stats[agent]["total_confidence"] += quality.get("confidence", 0)
+                if quality.get("produced_output", False):
+                    agent_stats[agent]["produced_output_count"] += 1
+        # Calculate averages
+        for agent, stats in agent_stats.items():
+            runs = stats["total_runs"]
+            stats["avg_confidence"] = round(stats["total_confidence"] / runs, 3) if runs > 0 else 0
+            stats["output_rate"] = round(stats["produced_output_count"] / runs, 3) if runs > 0 else 0
+        return {
+            "total_cases": len(self._case_learnings),
+            "agents": agent_stats,
+        }
+    # ── Prompt Evolution (Task 35) ───────────────────────────────────────────
+    async def run_prompt_optimization(self, prompt_names: List[str]) -> Dict[str, Any]:
+        """
+        Run prompt optimization for specified prompts.
+        Uses prompt_optimizer to create improved variants based on
+        prompt performance data.
+        """
+        if not self.prompt_optimizer:
+            return {"status": "skipped", "reason": "prompt_optimizer not configured"}
+        results = []
+        performance = self.get_prompt_performance()
+        for name in prompt_names:
+            agent_perf = performance.get("agents", {}).get(name, {})
+            avg_conf = agent_perf.get("avg_confidence", 0.5)
+            # Only optimize prompts with low average confidence
+            if avg_conf > 0.8:
+                results.append({"prompt": name, "status": "skipped", "reason": "already high performance"})
+                continue
+            try:
+                from app.services.prompt_store import get_prompt
+                prompt_data = get_prompt(name)
+                if not prompt_data:
+                    continue
+                goal = f"Improve output quality (current avg confidence: {avg_conf:.2f})"
+                variant = await self.prompt_optimizer.create_prompt_variant(
+                    name, prompt_data["content"], goal
+                )
+                results.append({"prompt": name, "status": "variant_created", "variant_id": variant["id"]})
+            except Exception as e:
+                logger.error(f"Failed to optimize prompt {name}: {e}")
+                results.append({"prompt": name, "status": "error", "error": str(e)})
+        self.last_run["prompt_optimization"] = datetime.utcnow().isoformat()
+        return {"results": results, "timestamp": self.last_run["prompt_optimization"]}
+    def get_active_prompt(self, prompt_name: str) -> Optional[str]:
+        """
+        Get the active production prompt text, if one has been promoted.
+        Args:
+            prompt_name: Prompt name (e.g., "research", "verifier")
+        Returns:
+            Production prompt text, or None if no production version exists
+        """
+        if not self.prompt_optimizer:
+            return None
+        production = self.prompt_optimizer._get_production_variant(prompt_name)
+        if production:
+            return production.get("prompt_text")
+        return None
+    # ── Skill Distillation (Task 36) ─────────────────────────────────────────
+    async def run_skill_distillation(self, min_frequency: int = 3) -> Dict[str, Any]:
+        """
+        Run skill distillation from recent case patterns.
+        """
+        if not self.skill_distiller:
+            return {"status": "skipped", "reason": "skill_distiller not configured"}
+        # Use in-memory case learnings as source data
+        candidates = self.skill_distiller.detect_skill_candidates(
+            self._case_learnings, min_frequency=min_frequency
+        )
+        skills_created = []
+        for candidate in candidates[:5]:
+            example_cases = [
+                l for l in self._case_learnings
+                if l.get("domain") == candidate.get("domain")
+            ][:3]
+            try:
+                skill = await self.skill_distiller.distill_skill(candidate, example_cases)
+                skills_created.append(skill)
+            except Exception as e:
+                logger.error(f"Failed to distill skill: {e}")
+        self.last_run["skill_distillation"] = datetime.utcnow().isoformat()
+        return {
+            "candidates_found": len(candidates),
+            "skills_created": len(skills_created),
+            "skills": skills_created,
+            "timestamp": self.last_run["skill_distillation"],
+        }
+    # ── Trust & Freshness (Task 37) ──────────────────────────────────────────
+    async def run_freshness_refresh(self) -> Dict[str, Any]:
+        """
+        Check freshness of all knowledge items and flag stale ones.
+        """
+        if not self.trust_manager:
+            return {"status": "skipped", "reason": "trust_manager not configured"}
+        all_items = self.knowledge_store.list_all()
+        stale = self.trust_manager.get_stale_items(all_items, threshold=0.3)
+        recommendations = self.trust_manager.recommend_refresh(stale, max_recommendations=10)
+        # Update freshness scores
+        for item in all_items:
+            freshness = self.trust_manager.calculate_freshness(item)
+            item_id = item.get("id")
+            if item_id:
+                self.trust_manager.update_freshness(item_id, freshness)
+        self.last_run["freshness_refresh"] = datetime.utcnow().isoformat()
+        return {
+            "total_items": len(all_items),
+            "stale_items": len(stale),
+            "refresh_recommendations": len(recommendations),
+            "recommendations": recommendations,
+            "timestamp": self.last_run["freshness_refresh"],
+        }
+    # ── Status & Insights ────────────────────────────────────────────────────
+    def get_status(self) -> Dict[str, Any]:
+        """Get learning engine status."""
+        storage_stats = self.knowledge_store.get_storage_stats()
+        return {
+            "storage": storage_stats,
+            "last_run": self.last_run,
+            "enabled": True,
+            "cases_learned": len(self._case_learnings),
+            "components": {
+                "knowledge_store": True,
+                "knowledge_ingestor": True,
+                "prompt_optimizer": self.prompt_optimizer is not None,
+                "skill_distiller": self.skill_distiller is not None,
+                "trust_manager": self.trust_manager is not None,
+            },
+        }
+    def get_insights(self) -> Dict[str, Any]:
+        """Get comprehensive learning insights."""
+        recent_items = self.knowledge_store.list_all(limit=10)
+        return {
+            "recent_knowledge": [
+                {
+                    "id": item.get("id"),
+                    "title": item.get("title"),
+                    "source": item.get("source"),
+                    "saved_at": item.get("saved_at"),
+                }
+                for item in recent_items
+            ],
+            "storage_stats": self.knowledge_store.get_storage_stats(),
+            "route_effectiveness": self.get_route_effectiveness(),
+            "prompt_performance": self.get_prompt_performance(),
+            "patterns": self.detect_patterns(),
+        }

backend/app/services/learning/prompt_optimizer.py ADDED Viewed

	@@ -0,0 +1,274 @@

+"""
+Prompt evolution through A/B testing and versioning.
+Creates prompt variants, tests them, and promotes better versions.
+"""
+import json
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+import uuid
+logger = logging.getLogger(__name__)
+class PromptOptimizer:
+    """Manages prompt evolution and A/B testing."""
+    def __init__(self, data_dir: str, model_fn):
+        self.data_dir = Path(data_dir) / "prompt_versions"
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.model_fn = model_fn
+    async def create_prompt_variant(self, prompt_name: str, current_prompt: str, goal: str) -> Dict[str, Any]:
+        """
+        Create a new prompt variant using LLM.
+        Args:
+            prompt_name: Name of the prompt (e.g., "research", "verifier")
+            current_prompt: Current prompt text
+            goal: Optimization goal (e.g., "improve clarity", "reduce tokens")
+        Returns:
+            Prompt variant with metadata
+        """
+        logger.info(f"Creating prompt variant for {prompt_name} with goal: {goal}")
+        optimization_prompt = f"""You are a prompt engineering expert. Improve the following prompt based on this goal: {goal}
+Current prompt:
+{current_prompt}
+Create an improved version that:
+1. Maintains the same core functionality
+2. {goal}
+3. Is clear and actionable
+Improved prompt:"""
+        try:
+            improved_prompt = await self.model_fn(optimization_prompt, max_tokens=2000)
+            variant = {
+                "id": str(uuid.uuid4()),
+                "prompt_name": prompt_name,
+                "version": self._get_next_version(prompt_name),
+                "prompt_text": improved_prompt,
+                "goal": goal,
+                "status": "testing",
+                "created_at": datetime.utcnow().isoformat(),
+                "test_count": 0,
+                "win_count": 0,
+                "win_rate": 0.0,
+            }
+            self._save_variant(variant)
+            logger.info(f"Created prompt variant: {variant['id']} (v{variant['version']})")
+            return variant
+        except Exception as e:
+            logger.error(f"Failed to create prompt variant: {e}")
+            raise
+    async def test_prompt_variant(self, variant_id: str, test_input: str, expected_quality: str) -> Dict[str, Any]:
+        """
+        Test a prompt variant with sample input.
+        Args:
+            variant_id: Variant ID
+            test_input: Test input
+            expected_quality: Expected quality criteria
+        Returns:
+            Test results
+        """
+        variant = self._load_variant(variant_id)
+        if not variant:
+            raise ValueError(f"Variant not found: {variant_id}")
+        logger.info(f"Testing prompt variant: {variant_id}")
+        try:
+            # Run the prompt with test input
+            test_prompt = variant["prompt_text"].replace("{input}", test_input)
+            output = await self.model_fn(test_prompt, max_tokens=1000)
+            # Evaluate quality
+            quality_score = await self._evaluate_quality(output, expected_quality)
+            # Update variant stats
+            variant["test_count"] += 1
+            if quality_score >= 0.7:  # Consider it a "win" if quality >= 70%
+                variant["win_count"] += 1
+            variant["win_rate"] = variant["win_count"] / variant["test_count"]
+            self._save_variant(variant)
+            return {
+                "variant_id": variant_id,
+                "quality_score": quality_score,
+                "output": output,
+                "win_rate": variant["win_rate"],
+            }
+        except Exception as e:
+            logger.error(f"Failed to test prompt variant: {e}")
+            raise
+    def compare_prompts(self, variant_id_a: str, variant_id_b: str) -> Dict[str, Any]:
+        """
+        Compare two prompt variants.
+        Args:
+            variant_id_a: First variant ID
+            variant_id_b: Second variant ID
+        Returns:
+            Comparison results
+        """
+        variant_a = self._load_variant(variant_id_a)
+        variant_b = self._load_variant(variant_id_b)
+        if not variant_a or not variant_b:
+            raise ValueError("One or both variants not found")
+        return {
+            "variant_a": {
+                "id": variant_a["id"],
+                "version": variant_a["version"],
+                "win_rate": variant_a["win_rate"],
+                "test_count": variant_a["test_count"],
+            },
+            "variant_b": {
+                "id": variant_b["id"],
+                "version": variant_b["version"],
+                "win_rate": variant_b["win_rate"],
+                "test_count": variant_b["test_count"],
+            },
+            "winner": variant_a["id"] if variant_a["win_rate"] > variant_b["win_rate"] else variant_b["id"],
+        }
+    def promote_prompt(self, variant_id: str, min_tests: int = 10, min_win_rate: float = 0.7) -> bool:
+        """
+        Promote a prompt variant to production.
+        Args:
+            variant_id: Variant ID
+            min_tests: Minimum number of tests required
+            min_win_rate: Minimum win rate required
+        Returns:
+            True if promoted, False otherwise
+        """
+        variant = self._load_variant(variant_id)
+        if not variant:
+            raise ValueError(f"Variant not found: {variant_id}")
+        # Validate promotion criteria
+        if variant["test_count"] < min_tests:
+            logger.warning(f"Variant {variant_id} has insufficient tests: {variant['test_count']} < {min_tests}")
+            return False
+        if variant["win_rate"] < min_win_rate:
+            logger.warning(f"Variant {variant_id} has insufficient win rate: {variant['win_rate']} < {min_win_rate}")
+            return False
+        # Archive current production version
+        current_production = self._get_production_variant(variant["prompt_name"])
+        if current_production:
+            current_production["status"] = "archived"
+            current_production["archived_at"] = datetime.utcnow().isoformat()
+            self._save_variant(current_production)
+        # Promote variant
+        variant["status"] = "production"
+        variant["promoted_at"] = datetime.utcnow().isoformat()
+        self._save_variant(variant)
+        logger.info(f"Promoted prompt variant {variant_id} to production")
+        return True
+    def archive_prompt(self, variant_id: str):
+        """Archive a prompt variant."""
+        variant = self._load_variant(variant_id)
+        if not variant:
+            raise ValueError(f"Variant not found: {variant_id}")
+        variant["status"] = "archived"
+        variant["archived_at"] = datetime.utcnow().isoformat()
+        self._save_variant(variant)
+        logger.info(f"Archived prompt variant: {variant_id}")
+    def list_versions(self, prompt_name: str) -> List[Dict[str, Any]]:
+        """List all versions of a prompt."""
+        versions = []
+        for file_path in self.data_dir.glob(f"{prompt_name}_*.json"):
+            try:
+                with open(file_path, 'r') as f:
+                    variant = json.load(f)
+                versions.append(variant)
+            except Exception as e:
+                logger.error(f"Failed to read variant {file_path}: {e}")
+        # Sort by version descending
+        versions.sort(key=lambda x: x.get("version", 0), reverse=True)
+        return versions
+    def _save_variant(self, variant: Dict[str, Any]):
+        """Save a prompt variant to disk."""
+        file_path = self.data_dir / f"{variant['prompt_name']}_{variant['id']}.json"
+        with open(file_path, 'w') as f:
+            json.dump(variant, f, indent=2)
+    def _load_variant(self, variant_id: str) -> Optional[Dict[str, Any]]:
+        """Load a prompt variant from disk."""
+        for file_path in self.data_dir.glob(f"*_{variant_id}.json"):
+            with open(file_path, 'r') as f:
+                return json.load(f)
+        return None
+    def _get_production_variant(self, prompt_name: str) -> Optional[Dict[str, Any]]:
+        """Get the current production variant for a prompt."""
+        for file_path in self.data_dir.glob(f"{prompt_name}_*.json"):
+            try:
+                with open(file_path, 'r') as f:
+                    variant = json.load(f)
+                if variant.get("status") == "production":
+                    return variant
+            except Exception as e:
+                logger.error(f"Failed to read variant {file_path}: {e}")
+        return None
+    def _get_next_version(self, prompt_name: str) -> int:
+        """Get the next version number for a prompt."""
+        versions = self.list_versions(prompt_name)
+        if not versions:
+            return 1
+        return max(v.get("version", 0) for v in versions) + 1
+    async def _evaluate_quality(self, output: str, expected_quality: str) -> float:
+        """Evaluate output quality using LLM."""
+        eval_prompt = f"""Evaluate the quality of this output based on the criteria: {expected_quality}
+Output:
+{output}
+Rate the quality from 0.0 to 1.0, where:
+- 0.0 = Completely fails criteria
+- 0.5 = Partially meets criteria
+- 1.0 = Fully meets criteria
+Provide only the numeric score:"""
+        try:
+            score_text = await self.model_fn(eval_prompt, max_tokens=10)
+            score = float(score_text.strip())
+            return max(0.0, min(1.0, score))  # Clamp to [0, 1]
+        except Exception as e:
+            logger.error(f"Failed to evaluate quality: {e}")
+            return 0.5  # Default to neutral score

backend/app/services/learning/scheduler.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""
+Learning scheduler with safeguards for laptop deployment.
+Schedules learning tasks with CPU, battery, and system idle checks.
+"""
+import asyncio
+import logging
+import psutil
+from datetime import datetime, timedelta
+from typing import Dict, Any, Callable, Optional
+logger = logging.getLogger(__name__)
+class LearningScheduler:
+    """Schedules learning tasks with system safeguards."""
+    def __init__(
+        self,
+        max_cpu_percent: float = 50.0,
+        min_battery_percent: float = 30.0,
+        check_interval_seconds: int = 60,
+    ):
+        self.max_cpu_percent = max_cpu_percent
+        self.min_battery_percent = min_battery_percent
+        self.check_interval_seconds = check_interval_seconds
+        self.scheduled_tasks = {}
+        self.running = False
+        self.last_run = {}
+    def schedule_task(
+        self,
+        task_name: str,
+        task_fn: Callable,
+        interval_hours: int,
+        run_immediately: bool = False,
+    ):
+        """
+        Schedule a learning task.
+        Args:
+            task_name: Task name
+            task_fn: Async function to run
+            interval_hours: Interval in hours
+            run_immediately: Whether to run immediately on first check
+        """
+        self.scheduled_tasks[task_name] = {
+            "fn": task_fn,
+            "interval": timedelta(hours=interval_hours),
+            "last_run": None if run_immediately else datetime.utcnow(),
+        }
+        logger.info(f"Scheduled task: {task_name} (interval={interval_hours}h)")
+    async def start(self):
+        """Start the scheduler."""
+        if self.running:
+            logger.warning("Scheduler already running")
+            return
+        self.running = True
+        logger.info("Learning scheduler started")
+        while self.running:
+            try:
+                await self._check_and_run_tasks()
+                await asyncio.sleep(self.check_interval_seconds)
+            except Exception as e:
+                logger.error(f"Scheduler error: {e}")
+                await asyncio.sleep(self.check_interval_seconds)
+    def stop(self):
+        """Stop the scheduler."""
+        self.running = False
+        logger.info("Learning scheduler stopped")
+    async def run_once(self, task_name: str) -> Dict[str, Any]:
+        """
+        Run a task once manually.
+        Args:
+            task_name: Task name
+        Returns:
+            Task result
+        """
+        if task_name not in self.scheduled_tasks:
+            raise ValueError(f"Task not found: {task_name}")
+        task = self.scheduled_tasks[task_name]
+        logger.info(f"Running task manually: {task_name}")
+        try:
+            result = await task["fn"]()
+            task["last_run"] = datetime.utcnow()
+            self.last_run[task_name] = task["last_run"].isoformat()
+            return {
+                "task_name": task_name,
+                "status": "success",
+                "result": result,
+                "timestamp": task["last_run"].isoformat(),
+            }
+        except Exception as e:
+            logger.error(f"Task failed: {task_name}: {e}")
+            return {
+                "task_name": task_name,
+                "status": "error",
+                "error": str(e),
+                "timestamp": datetime.utcnow().isoformat(),
+            }
+    def is_system_idle(self) -> bool:
+        """
+        Check if system is idle (low CPU usage).
+        Returns:
+            True if system is idle
+        """
+        try:
+            cpu_percent = psutil.cpu_percent(interval=1)
+            is_idle = cpu_percent < self.max_cpu_percent
+            if not is_idle:
+                logger.debug(f"System not idle: CPU={cpu_percent:.1f}% (max={self.max_cpu_percent}%)")
+            return is_idle
+        except Exception as e:
+            logger.error(f"Failed to check CPU usage: {e}")
+            return False  # Assume not idle on error
+    def is_battery_ok(self) -> bool:
+        """
+        Check if battery level is sufficient.
+        Returns:
+            True if battery is OK or plugged in
+        """
+        try:
+            battery = psutil.sensors_battery()
+            # If no battery (desktop) or plugged in, always OK
+            if battery is None or battery.power_plugged:
+                return True
+            # Check battery percentage
+            is_ok = battery.percent >= self.min_battery_percent
+            if not is_ok:
+                logger.debug(f"Battery too low: {battery.percent:.1f}% (min={self.min_battery_percent}%)")
+            return is_ok
+        except Exception as e:
+            logger.error(f"Failed to check battery: {e}")
+            return True  # Assume OK on error (might be desktop)
+    async def _check_and_run_tasks(self):
+        """Check and run scheduled tasks if conditions are met."""
+        # Check system conditions
+        if not self.is_system_idle():
+            logger.debug("Skipping scheduled tasks: system not idle")
+            return
+        if not self.is_battery_ok():
+            logger.debug("Skipping scheduled tasks: battery too low")
+            return
+        # Check each task
+        now = datetime.utcnow()
+        for task_name, task in self.scheduled_tasks.items():
+            # Check if task is due
+            if task["last_run"] is not None:
+                time_since_last_run = now - task["last_run"]
+                if time_since_last_run < task["interval"]:
+                    continue
+            # Run task
+            logger.info(f"Running scheduled task: {task_name}")
+            try:
+                result = await task["fn"]()
+                task["last_run"] = now
+                self.last_run[task_name] = now.isoformat()
+                logger.info(f"Task completed: {task_name}")
+            except Exception as e:
+                logger.error(f"Task failed: {task_name}: {e}")
+                # Still update last_run to avoid retry loop
+                task["last_run"] = now
+                self.last_run[task_name] = now.isoformat()
+    def get_status(self) -> Dict[str, Any]:
+        """Get scheduler status."""
+        tasks_status = []
+        for task_name, task in self.scheduled_tasks.items():
+            last_run = task["last_run"]
+            next_run = None
+            if last_run is not None:
+                next_run = (last_run + task["interval"]).isoformat()
+            tasks_status.append({
+                "name": task_name,
+                "interval_hours": task["interval"].total_seconds() / 3600,
+                "last_run": last_run.isoformat() if last_run else None,
+                "next_run": next_run,
+            })
+        return {
+            "running": self.running,
+            "system_idle": self.is_system_idle(),
+            "battery_ok": self.is_battery_ok(),
+            "tasks": tasks_status,
+        }

backend/app/services/learning/skill_distiller.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+Skill distillation from repeated patterns in case execution.
+Detects patterns in successful cases and distills them into reusable skills.
+"""
+import json
+import logging
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+from collections import Counter
+logger = logging.getLogger(__name__)
+class SkillDistiller:
+    """Distills skills from execution patterns."""
+    def __init__(self, data_dir: str, model_fn):
+        self.data_dir = Path(data_dir) / "skills"
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.model_fn = model_fn
+    def detect_skill_candidates(self, cases: List[Dict[str, Any]], min_frequency: int = 3) -> List[Dict[str, Any]]:
+        """
+        Detect skill candidates from case patterns.
+        Args:
+            cases: List of case records
+            min_frequency: Minimum frequency for a pattern to be considered
+        Returns:
+            List of skill candidates
+        """
+        logger.info(f"Detecting skill candidates from {len(cases)} cases")
+        # Extract patterns
+        domain_patterns = Counter()
+        source_patterns = Counter()
+        agent_patterns = Counter()
+        for case in cases:
+            route = case.get("route", {})
+            domain = route.get("domain_pack")
+            if domain:
+                domain_patterns[domain] += 1
+            # Extract sources from research output
+            outputs = case.get("outputs", {})
+            research_output = outputs.get("research", {})
+            sources = research_output.get("sources", [])
+            for source in sources:
+                source_patterns[source] += 1
+            # Extract agent sequence
+            agent_sequence = tuple(outputs.keys())
+            agent_patterns[agent_sequence] += 1
+        # Find frequent patterns
+        candidates = []
+        for domain, count in domain_patterns.items():
+            if count >= min_frequency:
+                candidates.append({
+                    "type": "domain_expertise",
+                    "domain": domain,
+                    "frequency": count,
+                    "confidence": count / len(cases),
+                })
+        for source, count in source_patterns.items():
+            if count >= min_frequency:
+                candidates.append({
+                    "type": "preferred_source",
+                    "source": source,
+                    "frequency": count,
+                    "confidence": count / len(cases),
+                })
+        for agent_seq, count in agent_patterns.items():
+            if count >= min_frequency:
+                candidates.append({
+                    "type": "agent_workflow",
+                    "agents": list(agent_seq),
+                    "frequency": count,
+                    "confidence": count / len(cases),
+                })
+        logger.info(f"Detected {len(candidates)} skill candidates")
+        return candidates
+    async def distill_skill(self, candidate: Dict[str, Any], example_cases: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Distill a skill from a candidate pattern.
+        Args:
+            candidate: Skill candidate
+            example_cases: Example cases demonstrating the pattern
+        Returns:
+            Distilled skill
+        """
+        logger.info(f"Distilling skill from candidate: {candidate['type']}")
+        # Generate skill description using LLM
+        examples_text = "\n\n".join([
+            f"Case {i+1}:\nInput: {case.get('user_input', '')}\nOutput: {case.get('final_answer', '')}"
+            for i, case in enumerate(example_cases[:3])
+        ])
+        distill_prompt = f"""Analyze these successful cases and create a reusable skill description.
+Pattern type: {candidate['type']}
+Pattern details: {json.dumps(candidate, indent=2)}
+Example cases:
+{examples_text}
+Create a skill that includes:
+1. A clear name
+2. Trigger patterns (when to use this skill)
+3. Recommended agents
+4. Preferred sources
+5. Expected outcomes
+Skill (JSON format):"""
+        try:
+            skill_text = await self.model_fn(distill_prompt, max_tokens=1000)
+            # Parse skill (basic fallback if LLM doesn't return valid JSON)
+            try:
+                skill = json.loads(skill_text)
+            except json.JSONDecodeError:
+                skill = {
+                    "name": f"{candidate['type']}_skill",
+                    "description": skill_text,
+                    "trigger_patterns": [],
+                    "recommended_agents": [],
+                    "preferred_sources": [],
+                }
+            # Add metadata
+            skill["id"] = f"{candidate['type']}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}"
+            skill["type"] = candidate["type"]
+            skill["frequency"] = candidate["frequency"]
+            skill["confidence"] = candidate["confidence"]
+            skill["created_at"] = datetime.utcnow().isoformat()
+            skill["usage_count"] = 0
+            skill["success_count"] = 0
+            skill["success_rate"] = 0.0
+            self._save_skill(skill)
+            logger.info(f"Distilled skill: {skill['id']}")
+            return skill
+        except Exception as e:
+            logger.error(f"Failed to distill skill: {e}")
+            raise
+    async def test_skill(self, skill_id: str, test_case: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Test a skill with a test case.
+        Args:
+            skill_id: Skill ID
+            test_case: Test case
+        Returns:
+            Test results
+        """
+        skill = self._load_skill(skill_id)
+        if not skill:
+            raise ValueError(f"Skill not found: {skill_id}")
+        logger.info(f"Testing skill: {skill_id}")
+        # Check if skill triggers match test case
+        triggers_match = self._check_triggers(skill, test_case)
+        return {
+            "skill_id": skill_id,
+            "triggers_match": triggers_match,
+            "test_case_id": test_case.get("case_id"),
+        }
+    def apply_skill(self, skill_id: str, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Apply a skill to a context.
+        Args:
+            skill_id: Skill ID
+            context: Execution context
+        Returns:
+            Skill application results
+        """
+        skill = self._load_skill(skill_id)
+        if not skill:
+            raise ValueError(f"Skill not found: {skill_id}")
+        logger.info(f"Applying skill: {skill_id}")
+        # Update usage stats
+        skill["usage_count"] += 1
+        self._save_skill(skill)
+        return {
+            "skill_id": skill_id,
+            "recommended_agents": skill.get("recommended_agents", []),
+            "preferred_sources": skill.get("preferred_sources", []),
+            "expected_outcomes": skill.get("expected_outcomes", []),
+        }
+    def record_skill_outcome(self, skill_id: str, success: bool):
+        """Record the outcome of a skill application."""
+        skill = self._load_skill(skill_id)
+        if not skill:
+            return
+        if success:
+            skill["success_count"] += 1
+        if skill["usage_count"] > 0:
+            skill["success_rate"] = skill["success_count"] / skill["usage_count"]
+        self._save_skill(skill)
+        logger.info(f"Recorded skill outcome: {skill_id} (success={success})")
+    def list_skills(self) -> List[Dict[str, Any]]:
+        """List all skills."""
+        skills = []
+        for file_path in self.data_dir.glob("*.json"):
+            try:
+                with open(file_path, 'r') as f:
+                    skill = json.load(f)
+                skills.append(skill)
+            except Exception as e:
+                logger.error(f"Failed to read skill {file_path}: {e}")
+        # Sort by success rate descending
+        skills.sort(key=lambda x: x.get("success_rate", 0), reverse=True)
+        return skills
+    def get_skill(self, skill_id: str) -> Optional[Dict[str, Any]]:
+        """Get a skill by ID."""
+        return self._load_skill(skill_id)
+    def _save_skill(self, skill: Dict[str, Any]):
+        """Save a skill to disk."""
+        file_path = self.data_dir / f"{skill['id']}.json"
+        with open(file_path, 'w') as f:
+            json.dump(skill, f, indent=2)
+    def _load_skill(self, skill_id: str) -> Optional[Dict[str, Any]]:
+        """Load a skill from disk."""
+        file_path = self.data_dir / f"{skill_id}.json"
+        if not file_path.exists():
+            return None
+        with open(file_path, 'r') as f:
+            return json.load(f)
+    def _check_triggers(self, skill: Dict[str, Any], test_case: Dict[str, Any]) -> bool:
+        """Check if skill triggers match a test case."""
+        trigger_patterns = skill.get("trigger_patterns", [])
+        if not trigger_patterns:
+            return True  # No triggers means always applicable
+        user_input = test_case.get("user_input", "").lower()
+        for pattern in trigger_patterns:
+            if pattern.lower() in user_input:
+                return True
+        return False

backend/app/services/learning/trust_manager.py ADDED Viewed

	@@ -0,0 +1,245 @@

+"""
+Trust and freshness management for sources and knowledge.
+Tracks source reliability and content freshness over time.
+"""
+import json
+import logging
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, Any, Optional, List
+logger = logging.getLogger(__name__)
+class TrustManager:
+    """Manages trust scores and freshness for sources."""
+    def __init__(self, data_dir: str):
+        self.data_dir = Path(data_dir) / "learning"
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.trust_file = self.data_dir / "source_trust.json"
+        self.freshness_file = self.data_dir / "freshness_scores.json"
+        # Initialize files if they don't exist
+        if not self.trust_file.exists():
+            self._save_trust_data({})
+        if not self.freshness_file.exists():
+            self._save_freshness_data({})
+    def get_trust_score(self, source: str) -> float:
+        """
+        Get trust score for a source.
+        Args:
+            source: Source identifier (URL or name)
+        Returns:
+            Trust score (0.0 to 1.0)
+        """
+        trust_data = self._load_trust_data()
+        source_data = trust_data.get(source, {})
+        return source_data.get("trust_score", 0.5)  # Default to neutral
+    def update_trust(self, source: str, verification_outcome: bool, weight: float = 1.0):
+        """
+        Update trust score based on verification outcome.
+        Args:
+            source: Source identifier
+            verification_outcome: True if verified, False if not
+            weight: Weight of this update (0.0 to 1.0)
+        """
+        trust_data = self._load_trust_data()
+        if source not in trust_data:
+            trust_data[source] = {
+                "trust_score": 0.5,
+                "verification_count": 0,
+                "success_count": 0,
+                "last_updated": datetime.utcnow().isoformat(),
+            }
+        source_data = trust_data[source]
+        # Update counts
+        source_data["verification_count"] += 1
+        if verification_outcome:
+            source_data["success_count"] += 1
+        # Calculate new trust score using exponential moving average
+        current_score = source_data["trust_score"]
+        outcome_score = 1.0 if verification_outcome else 0.0
+        alpha = 0.1 * weight  # Learning rate
+        new_score = (1 - alpha) * current_score + alpha * outcome_score
+        source_data["trust_score"] = new_score
+        source_data["last_updated"] = datetime.utcnow().isoformat()
+        trust_data[source] = source_data
+        self._save_trust_data(trust_data)
+        logger.info(f"Updated trust for {source}: {new_score:.3f} (outcome={verification_outcome})")
+    def list_trusted_sources(self, min_trust: float = 0.7, min_verifications: int = 3) -> List[Dict[str, Any]]:
+        """
+        List trusted sources.
+        Args:
+            min_trust: Minimum trust score
+            min_verifications: Minimum number of verifications
+        Returns:
+            List of trusted sources
+        """
+        trust_data = self._load_trust_data()
+        trusted = []
+        for source, data in trust_data.items():
+            if data["trust_score"] >= min_trust and data["verification_count"] >= min_verifications:
+                trusted.append({
+                    "source": source,
+                    "trust_score": data["trust_score"],
+                    "verification_count": data["verification_count"],
+                    "success_rate": data["success_count"] / data["verification_count"],
+                })
+        # Sort by trust score descending
+        trusted.sort(key=lambda x: x["trust_score"], reverse=True)
+        return trusted
+    def list_untrusted_sources(self, max_trust: float = 0.3, min_verifications: int = 3) -> List[Dict[str, Any]]:
+        """
+        List untrusted sources.
+        Args:
+            max_trust: Maximum trust score
+            min_verifications: Minimum number of verifications
+        Returns:
+            List of untrusted sources
+        """
+        trust_data = self._load_trust_data()
+        untrusted = []
+        for source, data in trust_data.items():
+            if data["trust_score"] <= max_trust and data["verification_count"] >= min_verifications:
+                untrusted.append({
+                    "source": source,
+                    "trust_score": data["trust_score"],
+                    "verification_count": data["verification_count"],
+                    "success_rate": data["success_count"] / data["verification_count"],
+                })
+        # Sort by trust score ascending
+        untrusted.sort(key=lambda x: x["trust_score"])
+        return untrusted
+    def calculate_freshness(self, item: Dict[str, Any], domain: Optional[str] = None) -> float:
+        """
+        Calculate freshness score for a knowledge item.
+        Args:
+            item: Knowledge item
+            domain: Domain for domain-specific rules
+        Returns:
+            Freshness score (0.0 to 1.0)
+        """
+        # Get age in days
+        saved_at = datetime.fromisoformat(item.get("saved_at", datetime.utcnow().isoformat()))
+        age_days = (datetime.utcnow() - saved_at).days
+        # Domain-specific expiration rules
+        if domain == "finance":
+            # Financial data expires quickly
+            half_life_days = 7  # 50% fresh after 7 days
+        else:
+            # General knowledge expires slowly
+            half_life_days = 30  # 50% fresh after 30 days
+        # Calculate freshness using exponential decay
+        freshness = 2 ** (-age_days / half_life_days)
+        return max(0.0, min(1.0, freshness))
+    def update_freshness(self, item_id: str, freshness_score: float):
+        """
+        Update freshness score for an item.
+        Args:
+            item_id: Item ID
+            freshness_score: New freshness score
+        """
+        freshness_data = self._load_freshness_data()
+        freshness_data[item_id] = {
+            "freshness_score": freshness_score,
+            "last_updated": datetime.utcnow().isoformat(),
+        }
+        self._save_freshness_data(freshness_data)
+    def get_stale_items(self, items: List[Dict[str, Any]], threshold: float = 0.3) -> List[Dict[str, Any]]:
+        """
+        Get stale items that need refreshing.
+        Args:
+            items: List of knowledge items
+            threshold: Freshness threshold
+        Returns:
+            List of stale items
+        """
+        stale = []
+        for item in items:
+            freshness = self.calculate_freshness(item)
+            if freshness < threshold:
+                stale.append({
+                    "item_id": item.get("id"),
+                    "title": item.get("title"),
+                    "freshness": freshness,
+                    "age_days": (datetime.utcnow() - datetime.fromisoformat(item.get("saved_at", datetime.utcnow().isoformat()))).days,
+                })
+        # Sort by freshness ascending (stalest first)
+        stale.sort(key=lambda x: x["freshness"])
+        return stale
+    def recommend_refresh(self, stale_items: List[Dict[str, Any]], max_recommendations: int = 10) -> List[Dict[str, Any]]:
+        """
+        Recommend items to refresh.
+        Args:
+            stale_items: List of stale items
+            max_recommendations: Maximum number of recommendations
+        Returns:
+            List of recommended items to refresh
+        """
+        # Prioritize by staleness and importance
+        recommendations = stale_items[:max_recommendations]
+        return recommendations
+    def _load_trust_data(self) -> Dict[str, Any]:
+        """Load trust data from disk."""
+        with open(self.trust_file, 'r') as f:
+            return json.load(f)
+    def _save_trust_data(self, data: Dict[str, Any]):
+        """Save trust data to disk."""
+        with open(self.trust_file, 'w') as f:
+            json.dump(data, f, indent=2)
+    def _load_freshness_data(self) -> Dict[str, Any]:
+        """Load freshness data from disk."""
+        with open(self.freshness_file, 'r') as f:
+            return json.load(f)
+    def _save_freshness_data(self, data: Dict[str, Any]):
+        """Save freshness data to disk."""
+        with open(self.freshness_file, 'w') as f:
+            json.dump(data, f, indent=2)

backend/app/services/mirofish_client.py CHANGED Viewed

@@ -1,15 +1,45 @@
-from typing import Any, Dict
 import httpx
-from app.config import (
-    MIROFISH_API_BASE,
-    MIROFISH_TIMEOUT_SECONDS,
-    MIROFISH_HEALTH_PATH,
-    MIROFISH_RUN_PATH,
-    MIROFISH_STATUS_PATH,
-    MIROFISH_REPORT_PATH,
-    MIROFISH_CHAT_PATH,
 )
@@ -17,71 +47,225 @@ class MiroFishError(Exception):
     pass
-def _build_url(path: str, simulation_id: str | None = None) -> str:
-    if simulation_id is not None:
-        path = path.replace("{id}", simulation_id)
-    return f"{MIROFISH_API_BASE.rstrip('/')}{path}"
 def mirofish_health() -> Dict[str, Any]:
     try:
         with httpx.Client(timeout=10) as client:
-            response = client.get(_build_url(MIROFISH_HEALTH_PATH))
         return {
             "reachable": response.status_code < 500,
             "status_code": response.status_code,
-            "body": response.text[:500],
         }
     except Exception as e:
-        return {
-            "reachable": False,
-            "status_code": None,
-            "body": str(e),
-        }
 def run_simulation(payload: Dict[str, Any]) -> Dict[str, Any]:
-    try:
-        with httpx.Client(timeout=MIROFISH_TIMEOUT_SECONDS) as client:
-            response = client.post(_build_url(MIROFISH_RUN_PATH), json=payload)
-        if response.status_code >= 400:
-            raise MiroFishError(f"MiroFish run error {response.status_code}: {response.text}")
-        return response.json()
-    except Exception as e:
-        raise MiroFishError(str(e))
 def simulation_status(simulation_id: str) -> Dict[str, Any]:
-    try:
-        with httpx.Client(timeout=30) as client:
-            response = client.get(_build_url(MIROFISH_STATUS_PATH, simulation_id))
-        if response.status_code >= 400:
-            raise MiroFishError(f"MiroFish status error {response.status_code}: {response.text}")
-        return response.json()
-    except Exception as e:
-        raise MiroFishError(str(e))
 def simulation_report(simulation_id: str) -> Dict[str, Any]:
     try:
-        with httpx.Client(timeout=30) as client:
-            response = client.get(_build_url(MIROFISH_REPORT_PATH, simulation_id))
-        if response.status_code >= 400:
-            raise MiroFishError(f"MiroFish report error {response.status_code}: {response.text}")
-        return response.json()
-    except Exception as e:
-        raise MiroFishError(str(e))
 def simulation_chat(simulation_id: str, message: str) -> Dict[str, Any]:
     try:
-        with httpx.Client(timeout=60) as client:
-            response = client.post(
-                _build_url(MIROFISH_CHAT_PATH, simulation_id),
-                json={"message": message},
-            )
-        if response.status_code >= 400:
-            raise MiroFishError(f"MiroFish chat error {response.status_code}: {response.text}")
-        return response.json()
-    except Exception as e:
-        raise MiroFishError(str(e))

+"""
+MiroFish client adapter.
+MiroFish actual API (all under /api/):
+  Graph/Project:
+    POST /api/graph/ontology/generate  - upload seed file + requirement → project_id
+    POST /api/graph/build              - build graph → task_id
+    GET  /api/graph/task/<task_id>     - poll build status → graph_id when done
+  Simulation:
+    POST /api/simulation/create        - create simulation → simulation_id
+    POST /api/simulation/prepare       - prepare agents
+    POST /api/simulation/prepare/status - poll prepare status
+    POST /api/simulation/start         - run simulation
+    GET  /api/simulation/<sim_id>      - get simulation status
+    GET  /api/simulation/list          - list simulations
+  Report:
+    POST /api/report/generate          - generate report → report_id
+    POST /api/report/generate/status   - poll report status
+    GET  /api/report/<report_id>       - get report
+    GET  /api/report/by-simulation/<sim_id> - get report by simulation
+    POST /api/report/chat              - chat with report agent
+"""
+import io
+import time
+import logging
+from typing import Any, Dict, Optional
 import httpx
+from app.config import MIROFISH_API_BASE, MIROFISH_TIMEOUT_SECONDS
+logger = logging.getLogger(__name__)
+BASE = MIROFISH_API_BASE.rstrip("/")
+# Module-level connection pool — reused across all requests
+_http_pool = httpx.Client(
+    timeout=MIROFISH_TIMEOUT_SECONDS,
+    limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
 )
     pass
+def _url(path: str) -> str:
+    return f"{BASE}{path}"
+def _post(path: str, json: Dict = None, data: Dict = None, files=None, timeout: int = 30) -> Dict:
+    try:
+        if files:
+            # Use a one-off client for multipart uploads (connection pool can't reuse these)
+            with httpx.Client(timeout=timeout) as client:
+                response = client.post(_url(path), data=data or {}, files=files)
+        else:
+            response = _http_pool.post(_url(path), json=json or {}, timeout=timeout)
+        if response.status_code >= 400:
+            raise MiroFishError(f"MiroFish {path} error {response.status_code}: {response.text[:300]}")
+        return response.json()
+    except MiroFishError:
+        raise
+    except Exception as e:
+        raise MiroFishError(str(e))
+def _get(path: str, timeout: int = 30) -> Dict:
+    try:
+        response = _http_pool.get(_url(path), timeout=timeout)
+        if response.status_code >= 400:
+            raise MiroFishError(f"MiroFish {path} error {response.status_code}: {response.text[:300]}")
+        return response.json()
+    except MiroFishError:
+        raise
+    except Exception as e:
+        raise MiroFishError(str(e))
+# ── Health ────────────────────────────────────────────────────────────────────
 def mirofish_health() -> Dict[str, Any]:
     try:
         with httpx.Client(timeout=10) as client:
+            response = client.get(_url("/health"))
         return {
             "reachable": response.status_code < 500,
             "status_code": response.status_code,
+            "body": response.text[:200],
         }
     except Exception as e:
+        return {"reachable": False, "status_code": None, "body": str(e)}
+# ── Full simulation workflow ──────────────────────────────────────────────────
 def run_simulation(payload: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Orchestrate the full MiroFish workflow:
+      1. Generate ontology (upload seed text as a .txt file)
+      2. Build graph (poll until done)
+      3. Create simulation
+      4. Prepare simulation (poll until done)
+      5. Start simulation
+    Returns a dict with simulation_id and status.
+    """
+    title = payload.get("title", "Simulation")
+    seed_text = payload.get("seed_text", payload.get("prediction_goal", ""))
+    requirement = payload.get("prediction_goal", title)
+    logger.info(f"MiroFish: starting full workflow for '{title}'")
+    # Step 1 – generate ontology by uploading seed text as a file
+    seed_bytes = seed_text.encode("utf-8")
+    files = [("files", ("seed.txt", io.BytesIO(seed_bytes), "text/plain"))]
+    form_data = {
+        "simulation_requirement": requirement,
+        "project_name": title,
+    }
+    ontology_resp = _post("/api/graph/ontology/generate", data=form_data, files=files, timeout=120)
+    if not ontology_resp.get("success"):
+        raise MiroFishError(f"Ontology generation failed: {ontology_resp.get('error')}")
+    project_id = ontology_resp["data"]["project_id"]
+    logger.info(f"MiroFish: project created {project_id}")
+    # Step 2 – build graph
+    build_resp = _post("/api/graph/build", json={"project_id": project_id}, timeout=120)
+    if not build_resp.get("success"):
+        raise MiroFishError(f"Graph build failed: {build_resp.get('error')}")
+    task_id = build_resp["data"]["task_id"]
+    logger.info(f"MiroFish: graph build task {task_id}")
+    # Poll graph build (max 5 min)
+    graph_id = _poll_graph_build(task_id, max_wait=300)
+    logger.info(f"MiroFish: graph ready {graph_id}")
+    # Step 3 – create simulation
+    create_resp = _post("/api/simulation/create", json={"project_id": project_id, "graph_id": graph_id})
+    if not create_resp.get("success"):
+        raise MiroFishError(f"Simulation create failed: {create_resp.get('error')}")
+    simulation_id = create_resp["data"]["simulation_id"]
+    logger.info(f"MiroFish: simulation created {simulation_id}")
+    # Step 4 – prepare simulation (async, poll)
+    prepare_resp = _post("/api/simulation/prepare", json={"simulation_id": simulation_id}, timeout=120)
+    if not prepare_resp.get("success"):
+        raise MiroFishError(f"Simulation prepare failed: {prepare_resp.get('error')}")
+    prepare_task_id = prepare_resp["data"].get("task_id")
+    if prepare_task_id:
+        _poll_prepare(simulation_id, prepare_task_id, max_wait=300)
+    logger.info(f"MiroFish: simulation prepared")
+    # Step 5 – start simulation
+    start_resp = _post("/api/simulation/start", json={"simulation_id": simulation_id}, timeout=60)
+    if not start_resp.get("success"):
+        raise MiroFishError(f"Simulation start failed: {start_resp.get('error')}")
+    logger.info(f"MiroFish: simulation started {simulation_id}")
+    return {
+        "simulation_id": simulation_id,
+        "project_id": project_id,
+        "graph_id": graph_id,
+        "status": "running",
+    }
+def _poll_graph_build(task_id: str, max_wait: int = 300) -> str:
+    """Poll graph build task until done, return graph_id. Uses exponential backoff."""
+    deadline = time.time() + max_wait
+    interval = 2.0  # Start at 2s, double each time, cap at 15s
+    while time.time() < deadline:
+        resp = _get(f"/api/graph/task/{task_id}")
+        data = resp.get("data", {})
+        status = data.get("status", "")
+        if status == "completed":
+            # graph_id may be at top level or nested under "result"
+            graph_id = data.get("graph_id") or data.get("result", {}).get("graph_id")
+            if not graph_id:
+                raise MiroFishError("Graph build completed but no graph_id returned")
+            return graph_id
+        if status in ("failed", "error"):
+            raise MiroFishError(f"Graph build failed: {data.get('error', 'unknown')}")
+        logger.debug(f"Graph build polling: status={status}, next check in {interval:.0f}s")
+        time.sleep(interval)
+        interval = min(interval * 2, 15.0)
+    raise MiroFishError("Graph build timed out")
+def _poll_prepare(simulation_id: str, task_id: str, max_wait: int = 300):
+    """Poll simulation prepare until done. Uses exponential backoff."""
+    deadline = time.time() + max_wait
+    interval = 2.0
+    while time.time() < deadline:
+        resp = _post("/api/simulation/prepare/status", json={"task_id": task_id})
+        data = resp.get("data", {})
+        status = data.get("status", "")
+        if status == "completed":
+            return
+        if status in ("failed", "error"):
+            raise MiroFishError(f"Simulation prepare failed: {data.get('error', 'unknown')}")
+        logger.debug(f"Prepare polling: status={status}, next check in {interval:.0f}s")
+        time.sleep(interval)
+        interval = min(interval * 2, 15.0)
+    raise MiroFishError("Simulation prepare timed out")
+# ── Status / Report / Chat ────────────────────────────────────────────────────
 def simulation_status(simulation_id: str) -> Dict[str, Any]:
+    resp = _get(f"/api/simulation/{simulation_id}")
+    return resp.get("data", resp)
 def simulation_report(simulation_id: str) -> Dict[str, Any]:
+    """Get or generate report for a simulation."""
+    # Try to get existing report first
     try:
+        resp = _get(f"/api/report/by-simulation/{simulation_id}")
+        if resp.get("success") and resp.get("data"):
+            return resp["data"]
+    except MiroFishError:
+        pass
+    # Generate report
+    gen_resp = _post("/api/report/generate", json={"simulation_id": simulation_id}, timeout=120)
+    if not gen_resp.get("success"):
+        raise MiroFishError(f"Report generation failed: {gen_resp.get('error')}")
+    report_id = gen_resp["data"].get("report_id")
+    task_id = gen_resp["data"].get("task_id")
+    # Poll if async
+    if task_id:
+        deadline = time.time() + 300
+        while time.time() < deadline:
+            status_resp = _post("/api/report/generate/status", json={"task_id": task_id})
+            data = status_resp.get("data", {})
+            if data.get("status") == "completed":
+                report_id = data.get("report_id", report_id)
+                break
+            if data.get("status") in ("failed", "error"):
+                raise MiroFishError(f"Report generation failed: {data.get('error')}")
+            time.sleep(5)
+    if report_id:
+        report_resp = _get(f"/api/report/{report_id}")
+        return report_resp.get("data", report_resp)
+    return gen_resp.get("data", {})
 def simulation_chat(simulation_id: str, message: str) -> Dict[str, Any]:
+    """Chat with the report agent for a simulation."""
+    # Get report_id for this simulation
     try:
+        resp = _get(f"/api/report/by-simulation/{simulation_id}")
+        report_id = resp.get("data", {}).get("report_id") if resp.get("success") else None
+    except MiroFishError:
+        report_id = None
+    chat_payload = {"message": message}
+    if report_id:
+        chat_payload["report_id"] = report_id
+    resp = _post("/api/report/chat", json=chat_payload, timeout=60)
+    if not resp.get("success"):
+        raise MiroFishError(f"Chat failed: {resp.get('error')}")
+    return resp.get("data", resp)

backend/app/services/sentinel/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""
+Sentinel Layer (Layer 6) - Adaptive Maintenance System
+The Sentinel Layer monitors system health, diagnoses issues, and proposes
+safe corrections to maintain system reliability without breaking anything.
+"""
+__all__ = [
+    "SentinelWatcher",
+    "SentinelDiagnostician",
+    "SentinelPatcher",
+    "CapabilityTracker",
+    "SentinelEngine",
+]

backend/app/services/sentinel/capability_tracker.py ADDED Viewed

	@@ -0,0 +1,414 @@

+"""
+Sentinel Capability Tracker - System Capability Metrics
+Computes and tracks a composite capability score across 6 dimensions.
+Reads existing data directories. Never calls any LLM. Never writes to
+any existing data directory - only writes to data/sentinel/.
+"""
+import json
+import logging
+import os
+import uuid
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+logger = logging.getLogger(__name__)
+# IMPORTANT COMMENT (must appear in source code):
+# agi_progression_index is a VANITY METRIC for developer motivation.
+# It is a weighted composite of system health indicators:
+# error rate, confidence scores, trust quality, skill growth, etc.
+# It does NOT measure general intelligence, emergent reasoning, or AGI.
+# The name is aspirational and intentionally optimistic, not scientific.
+class CapabilitySnapshot:
+    """Represents a snapshot of system capabilities."""
+    def __init__(
+        self,
+        scores: Dict[str, float],
+        agi_progression_index: float,
+        delta_from_last: Dict[str, float]
+    ):
+        self.snapshot_id = str(uuid.uuid4())
+        self.timestamp = datetime.utcnow().isoformat()
+        self.scores = scores
+        self.agi_progression_index = agi_progression_index
+        self.delta_from_last = delta_from_last
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "snapshot_id": self.snapshot_id,
+            "timestamp": self.timestamp,
+            "scores": self.scores,
+            "agi_progression_index": self.agi_progression_index,
+            "delta_from_last": self.delta_from_last,
+        }
+class CapabilityTracker:
+    """Tracks system capability metrics over time."""
+    def __init__(self):
+        self.data_dir = Path("backend/app/data")
+        self.sentinel_dir = self.data_dir / "sentinel"
+        self.sentinel_dir.mkdir(parents=True, exist_ok=True)
+    def snapshot(self) -> CapabilitySnapshot:
+        """
+        Compute current capability snapshot.
+        Returns:
+            CapabilitySnapshot with all dimensions and AGI index
+        """
+        # Compute all 6 dimensions
+        scores = {
+            "reasoning_depth": self._compute_reasoning_depth(),
+            "source_trust_avg": self._compute_source_trust_avg(),
+            "skill_coverage": self._compute_skill_coverage(),
+            "prompt_win_rate_avg": self._compute_prompt_win_rate_avg(),
+            "stability": self._compute_stability(),
+            "self_correction_rate": self._compute_self_correction_rate(),
+        }
+        # Compute AGI progression index
+        agi_index = self._compute_agi_index(scores)
+        # Get delta from last snapshot
+        delta_from_last = self._compute_delta(scores, agi_index)
+        # Create snapshot
+        snapshot = CapabilitySnapshot(
+            scores=scores,
+            agi_progression_index=agi_index,
+            delta_from_last=delta_from_last
+        )
+        # Save to history
+        self._save_snapshot(snapshot)
+        return snapshot
+    def trend(self, days: int = 7) -> List[Dict[str, Any]]:
+        """
+        Get capability trend over specified days.
+        Args:
+            days: Number of days to look back
+        Returns:
+            List of CapabilitySnapshot dicts
+        """
+        try:
+            history_file = self.sentinel_dir / "capability_history.json"
+            if not history_file.exists():
+                return []
+            with open(history_file, 'r') as f:
+                history = json.load(f)
+            # Filter to last N days
+            cutoff = datetime.utcnow() - timedelta(days=days)
+            filtered = []
+            for entry in history:
+                try:
+                    entry_time = datetime.fromisoformat(entry["timestamp"])
+                    if entry_time >= cutoff:
+                        filtered.append(entry)
+                except:
+                    continue
+            return filtered
+        except Exception as e:
+            logger.error(f"Failed to get capability trend: {e}")
+            return []
+    def _compute_reasoning_depth(self) -> float:
+        """Compute reasoning depth from case confidence scores."""
+        try:
+            memory_dir = self.data_dir / "memory"
+            if not memory_dir.exists():
+                return 0.5
+            case_files = sorted(memory_dir.glob("*.json"), key=os.path.getmtime, reverse=True)[:30]
+            if not case_files:
+                return 0.5
+            all_confidences = []
+            for case_file in case_files:
+                try:
+                    with open(case_file, 'r') as f:
+                        case_data = json.load(f)
+                    outputs = case_data.get("outputs", [])
+                    for output in outputs:
+                        if isinstance(output, dict):
+                            confidence = output.get("confidence")
+                            if confidence is not None:
+                                all_confidences.append(confidence)
+                except:
+                    continue
+            if not all_confidences:
+                return 0.5
+            return sum(all_confidences) / len(all_confidences)
+        except Exception as e:
+            logger.warning(f"Failed to compute reasoning depth: {e}")
+            return 0.5
+    def _compute_source_trust_avg(self) -> float:
+        """Compute average source trust score."""
+        try:
+            config_file = self.sentinel_dir / "sentinel_config.json"
+            if not config_file.exists():
+                return 0.5
+            with open(config_file, 'r') as f:
+                config = json.load(f)
+            trust_scores = config.get("source_trust_scores", {})
+            if not trust_scores:
+                return 0.5
+            values = [v for v in trust_scores.values() if isinstance(v, (int, float))]
+            if not values:
+                return 0.5
+            return sum(values) / len(values)
+        except Exception as e:
+            logger.warning(f"Failed to compute source trust avg: {e}")
+            return 0.5
+    def _compute_skill_coverage(self) -> float:
+        """Compute skill coverage metric."""
+        try:
+            skills_dir = self.data_dir / "skills"
+            if not skills_dir.exists():
+                return 0.0
+            skill_count = len(list(skills_dir.glob("*.json")))
+            # Normalize to 0-1 scale (20 skills = 1.0)
+            return min(skill_count / 20.0, 1.0)
+        except Exception as e:
+            logger.warning(f"Failed to compute skill coverage: {e}")
+            return 0.0
+    def _compute_prompt_win_rate_avg(self) -> float:
+        """Compute average prompt win rate."""
+        try:
+            prompt_versions_dir = self.data_dir / "prompt_versions"
+            if not prompt_versions_dir.exists():
+                return 0.5
+            win_rates = []
+            for version_file in prompt_versions_dir.glob("*.json"):
+                try:
+                    with open(version_file, 'r') as f:
+                        version_data = json.load(f)
+                    win_rate = version_data.get("win_rate")
+                    if win_rate is not None:
+                        win_rates.append(win_rate)
+                except:
+                    continue
+            if not win_rates:
+                return 0.5
+            return sum(win_rates) / len(win_rates)
+        except Exception as e:
+            logger.warning(f"Failed to compute prompt win rate avg: {e}")
+            return 0.5
+    def _compute_stability(self) -> float:
+        """Compute stability (1.0 - error_rate)."""
+        try:
+            memory_dir = self.data_dir / "memory"
+            if not memory_dir.exists():
+                return 1.0
+            # Get cases from last 7 days
+            cutoff = datetime.utcnow() - timedelta(days=7)
+            total_count = 0
+            failed_count = 0
+            for case_file in memory_dir.glob("*.json"):
+                try:
+                    # Check file modification time
+                    mtime = datetime.fromtimestamp(case_file.stat().st_mtime)
+                    if mtime < cutoff:
+                        continue
+                    with open(case_file, 'r') as f:
+                        case_data = json.load(f)
+                    total_count += 1
+                    # Check if case failed
+                    final_answer = case_data.get("final_answer", "")
+                    outputs = case_data.get("outputs", [])
+                    if not final_answer or not outputs:
+                        failed_count += 1
+                except:
+                    continue
+            if total_count == 0:
+                return 1.0
+            error_rate = failed_count / total_count
+            return 1.0 - error_rate
+        except Exception as e:
+            logger.warning(f"Failed to compute stability: {e}")
+            return 1.0
+    def _compute_self_correction_rate(self) -> float:
+        """Compute self-correction rate."""
+        try:
+            # Get patches from last 7 days
+            patch_history_file = self.sentinel_dir / "patch_history.json"
+            alert_history_file = self.sentinel_dir / "alert_history.json"
+            cutoff = datetime.utcnow() - timedelta(days=7)
+            patch_count = 0
+            if patch_history_file.exists():
+                with open(patch_history_file, 'r') as f:
+                    patches = json.load(f)
+                for patch in patches:
+                    try:
+                        patch_time = datetime.fromisoformat(patch["timestamp"])
+                        if patch_time >= cutoff:
+                            patch_count += 1
+                    except:
+                        continue
+            alert_count = 0
+            if alert_history_file.exists():
+                with open(alert_history_file, 'r') as f:
+                    alerts = json.load(f)
+                for alert in alerts:
+                    try:
+                        alert_time = datetime.fromisoformat(alert["timestamp"])
+                        if alert_time >= cutoff:
+                            alert_count += 1
+                    except:
+                        continue
+            if alert_count == 0:
+                return 0.0
+            return min(patch_count / alert_count, 1.0)
+        except Exception as e:
+            logger.warning(f"Failed to compute self-correction rate: {e}")
+            return 0.0
+    def _compute_agi_index(self, scores: Dict[str, float]) -> float:
+        """
+        Compute AGI progression index from dimension scores.
+        Formula:
+          index = (
+            (reasoning_depth * 0.25) +
+            (source_trust_avg * 0.15) +
+            (skill_coverage * 0.20) +
+            (prompt_win_rate_avg * 0.20) +
+            (stability * 0.10) +
+            (self_correction_rate * 0.10)
+          )
+        """
+        index = (
+            (scores.get("reasoning_depth", 0.5) * 0.25) +
+            (scores.get("source_trust_avg", 0.5) * 0.15) +
+            (scores.get("skill_coverage", 0.0) * 0.20) +
+            (scores.get("prompt_win_rate_avg", 0.5) * 0.20) +
+            (scores.get("stability", 1.0) * 0.10) +
+            (scores.get("self_correction_rate", 0.0) * 0.10)
+        )
+        # Clamp to [0.0, 1.0]
+        return max(0.0, min(1.0, index))
+    def _compute_delta(self, scores: Dict[str, float], agi_index: float) -> Dict[str, float]:
+        """Compute delta from last snapshot."""
+        try:
+            history_file = self.sentinel_dir / "capability_history.json"
+            if not history_file.exists():
+                return {k: 0.0 for k in scores.keys()}
+            with open(history_file, 'r') as f:
+                history = json.load(f)
+            if not history:
+                return {k: 0.0 for k in scores.keys()}
+            last_snapshot = history[-1]
+            last_scores = last_snapshot.get("scores", {})
+            delta = {}
+            for key, value in scores.items():
+                last_value = last_scores.get(key, value)
+                delta[key] = value - last_value
+            delta["agi_progression_index"] = agi_index - last_snapshot.get("agi_progression_index", agi_index)
+            return delta
+        except Exception as e:
+            logger.warning(f"Failed to compute delta: {e}")
+            return {k: 0.0 for k in scores.keys()}
+    def _save_snapshot(self, snapshot: CapabilitySnapshot):
+        """Save snapshot to history."""
+        try:
+            history_file = self.sentinel_dir / "capability_history.json"
+            # Load existing history
+            if history_file.exists():
+                with open(history_file, 'r') as f:
+                    history = json.load(f)
+            else:
+                history = []
+            # Append new snapshot
+            history.append(snapshot.to_dict())
+            # Keep only last 500 entries
+            if len(history) > 500:
+                history = history[-500:]
+            # Save back
+            with open(history_file, 'w') as f:
+                json.dump(history, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save capability snapshot: {e}")

backend/app/services/sentinel/diagnostician.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""
+Sentinel Diagnostician - Issue Diagnosis using LLM
+Takes a SentinelAlert, asks the existing free LLM to diagnose it,
+returns a structured SentinelDiagnosis.
+CRITICAL: Uses ONLY call_model() from app.agents._model for all LLM calls.
+"""
+import json
+import logging
+import uuid
+from datetime import datetime
+from typing import Dict, Any
+# MANDATORY IMPORT - use this and only this for LLM calls
+from app.agents._model import call_model
+logger = logging.getLogger(__name__)
+class SentinelDiagnosis:
+    """Represents a diagnosis of a system alert."""
+    def __init__(
+        self,
+        alert_id: str,
+        root_cause: str,
+        fix_type: str,
+        safe_to_auto_apply: bool,
+        proposed_fix: str,
+        confidence: float,
+        reasoning: str
+    ):
+        self.diagnosis_id = str(uuid.uuid4())
+        self.alert_id = alert_id
+        self.root_cause = root_cause
+        self.fix_type = fix_type
+        self.safe_to_auto_apply = safe_to_auto_apply
+        self.proposed_fix = proposed_fix
+        self.confidence = confidence
+        self.reasoning = reasoning
+        self.timestamp = datetime.utcnow().isoformat()
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "diagnosis_id": self.diagnosis_id,
+            "alert_id": self.alert_id,
+            "root_cause": self.root_cause,
+            "fix_type": self.fix_type,
+            "safe_to_auto_apply": self.safe_to_auto_apply,
+            "proposed_fix": self.proposed_fix,
+            "confidence": self.confidence,
+            "reasoning": self.reasoning,
+            "timestamp": self.timestamp,
+        }
+class SentinelDiagnostician:
+    """Diagnoses system alerts using LLM."""
+    def diagnose(self, alert) -> SentinelDiagnosis:
+        """
+        Diagnose a system alert using the free LLM.
+        Args:
+            alert: SentinelAlert object to diagnose
+        Returns:
+            SentinelDiagnosis with root cause and proposed fix
+        """
+        system_prompt = """You are a system diagnostician for an AI orchestration platform called MiroOrg. Your job is to analyze system alerts and propose safe fixes.
+You MUST respond with ONLY valid JSON. No markdown. No code fences. No explanation text before or after. Just the raw JSON object.
+The JSON must have exactly these fields:
+{
+  "root_cause": "string describing what caused this issue",
+  "fix_type": "one of: config | prompt | logic | dependency | data",
+  "safe_to_auto_apply": false,
+  "proposed_fix": "string describing the specific fix to apply",
+  "confidence": 0.0,
+  "reasoning": "string explaining your diagnosis"
+}
+IMPORTANT RULES for safe_to_auto_apply:
+- Set true ONLY if fix_type is "prompt" (editing a .txt prompt file) or "config" (changing a JSON config value)
+- Set false for fix_type "logic" (Python code changes)
+- Set false for fix_type "dependency" (package changes)
+- Set false for fix_type "data" (data migrations)
+- When in doubt: set false. Human review is always safer.
+- NEVER suggest auto-applying changes to .py files.
+Set confidence between 0.0 and 1.0 based on how certain you are."""
+        layer_names = {
+            1: "Core Platform",
+            2: "Agents",
+            3: "Domain Packs",
+            4: "Simulation",
+            5: "Knowledge Evolution",
+            6: "Sentinel"
+        }
+        prompt = f"""Analyze this system alert and diagnose the root cause:
+Alert ID: {alert.alert_id}
+Layer: {alert.layer} ({layer_names.get(alert.layer, 'Unknown')})
+Component: {alert.component}
+Issue Type: {alert.issue_type}
+Severity: {alert.severity}
+Evidence: {alert.raw_evidence}
+What is the most likely root cause and what is the safest fix?
+Remember: respond with ONLY the JSON object, nothing else."""
+        try:
+            # Call the existing free LLM
+            raw_response = call_model(
+                prompt=prompt,
+                mode="chat",
+                system_prompt=system_prompt
+            )
+            # Parse response
+            diagnosis_data = self._parse_llm_response(raw_response)
+            if diagnosis_data is None:
+                # Parse failed - return fallback diagnosis
+                return SentinelDiagnosis(
+                    alert_id=alert.alert_id,
+                    root_cause="Could not parse LLM response",
+                    fix_type="logic",
+                    safe_to_auto_apply=False,
+                    proposed_fix="Manual investigation required",
+                    confidence=0.0,
+                    reasoning=f"JSON parse failed. Raw response: {raw_response[:200]}"
+                )
+            # Create diagnosis from parsed data
+            return SentinelDiagnosis(
+                alert_id=alert.alert_id,
+                root_cause=diagnosis_data.get("root_cause", "Unknown"),
+                fix_type=diagnosis_data.get("fix_type", "logic"),
+                safe_to_auto_apply=diagnosis_data.get("safe_to_auto_apply", False),
+                proposed_fix=diagnosis_data.get("proposed_fix", "No fix proposed"),
+                confidence=float(diagnosis_data.get("confidence", 0.0)),
+                reasoning=diagnosis_data.get("reasoning", "No reasoning provided")
+            )
+        except Exception as e:
+            logger.warning(f"Diagnosis failed for alert {alert.alert_id}: {e}")
+            # Return fallback diagnosis
+            return SentinelDiagnosis(
+                alert_id=alert.alert_id,
+                root_cause=f"Diagnosis error: {str(e)}",
+                fix_type="logic",
+                safe_to_auto_apply=False,
+                proposed_fix="Manual investigation required",
+                confidence=0.0,
+                reasoning=f"Exception during diagnosis: {str(e)}"
+            )
+    def _parse_llm_response(self, raw_response: str) -> Dict[str, Any]:
+        """
+        Parse LLM response, handling markdown and other formatting.
+        Args:
+            raw_response: Raw string from LLM
+        Returns:
+            Parsed dict or None if parse failed
+        """
+        try:
+            # Strip markdown code fences if present
+            cleaned = raw_response.strip()
+            if cleaned.startswith("```json"):
+                cleaned = cleaned[7:]
+            elif cleaned.startswith("```"):
+                cleaned = cleaned[3:]
+            if cleaned.endswith("```"):
+                cleaned = cleaned[:-3]
+            cleaned = cleaned.strip()
+            # Parse JSON
+            data = json.loads(cleaned)
+            # Validate required fields
+            required_fields = ["root_cause", "fix_type", "safe_to_auto_apply", "proposed_fix", "confidence", "reasoning"]
+            for field in required_fields:
+                if field not in data:
+                    logger.warning(f"Missing required field in LLM response: {field}")
+                    return None
+            # Validate fix_type
+            valid_fix_types = ["config", "prompt", "logic", "dependency", "data"]
+            if data["fix_type"] not in valid_fix_types:
+                logger.warning(f"Invalid fix_type: {data['fix_type']}")
+                data["fix_type"] = "logic"
+            # Validate confidence
+            try:
+                data["confidence"] = float(data["confidence"])
+                data["confidence"] = max(0.0, min(1.0, data["confidence"]))
+            except:
+                data["confidence"] = 0.0
+            return data
+        except json.JSONDecodeError as e:
+            logger.warning(f"JSON parse error: {e}")
+            return None
+        except Exception as e:
+            logger.warning(f"Unexpected parse error: {e}")
+            return None

backend/app/services/sentinel/patcher.py ADDED Viewed

	@@ -0,0 +1,359 @@

+"""
+Sentinel Patcher - Safe Patch Application
+Applies safe patches automatically or queues unsafe ones for human review.
+Never touches .py files. Never calls any LLM.
+SAFE AUTO-APPLY targets (the ONLY files this class may write to automatically):
+  1. backend/app/prompts/*.txt - prompt text files only
+  2. backend/app/data/sentinel/sentinel_config.json - sentinel config only
+EVERYTHING ELSE goes to pending_patches/ for human review.
+"""
+import json
+import logging
+import os
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, Any, Optional
+logger = logging.getLogger(__name__)
+class PatchResult:
+    """Represents the result of a patch application."""
+    def __init__(
+        self,
+        patch_id: str,
+        applied: bool,
+        target_file: str,
+        change_summary: str,
+        requires_human_review: bool
+    ):
+        self.patch_id = patch_id
+        self.applied = applied
+        self.target_file = target_file
+        self.change_summary = change_summary
+        self.requires_human_review = requires_human_review
+        self.timestamp = datetime.utcnow().isoformat()
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "patch_id": self.patch_id,
+            "applied": self.applied,
+            "target_file": self.target_file,
+            "change_summary": self.change_summary,
+            "requires_human_review": self.requires_human_review,
+            "timestamp": self.timestamp,
+        }
+class SentinelPatcher:
+    """Applies safe patches or queues for review."""
+    def __init__(self):
+        self.data_dir = Path("backend/app/data")
+        self.prompts_dir = Path("backend/app/prompts")
+        self.sentinel_dir = self.data_dir / "sentinel"
+        self.pending_dir = self.sentinel_dir / "pending_patches"
+        # Ensure directories exist
+        self.sentinel_dir.mkdir(parents=True, exist_ok=True)
+        self.pending_dir.mkdir(parents=True, exist_ok=True)
+    def apply(self, diagnosis) -> PatchResult:
+        """
+        Apply a diagnosis as a patch.
+        Args:
+            diagnosis: SentinelDiagnosis object
+        Returns:
+            PatchResult indicating what happened
+        """
+        patch_id = str(uuid.uuid4())
+        timestamp = datetime.utcnow().isoformat()
+        # Check if safe to auto-apply
+        if diagnosis.safe_to_auto_apply and diagnosis.fix_type == "prompt":
+            result = self._apply_prompt_patch(patch_id, diagnosis, timestamp)
+        elif diagnosis.safe_to_auto_apply and diagnosis.fix_type == "config":
+            result = self._apply_config_patch(patch_id, diagnosis, timestamp)
+        else:
+            # Queue for human review
+            result = self._queue_for_review(patch_id, diagnosis, timestamp)
+        # Save to patch history
+        self._save_patch_history(result)
+        return result
+    def approve_pending(self, patch_id: str) -> Optional[PatchResult]:
+        """
+        Approve and apply a pending patch.
+        Args:
+            patch_id: ID of pending patch
+        Returns:
+            PatchResult or None if not found
+        """
+        pending_file = self.pending_dir / f"{patch_id}.patch.json"
+        if not pending_file.exists():
+            logger.warning(f"Pending patch not found: {patch_id}")
+            return None
+        try:
+            # Load pending patch
+            with open(pending_file, 'r') as f:
+                patch_data = json.load(f)
+            diagnosis_data = patch_data.get("diagnosis", {})
+            fix_type = diagnosis_data.get("fix_type", "logic")
+            # Refuse to auto-apply logic or dependency changes even with approval
+            if fix_type in ("logic", "dependency"):
+                result = PatchResult(
+                    patch_id=patch_id,
+                    applied=False,
+                    target_file=f"pending_patches/{patch_id}.patch.json",
+                    change_summary="Cannot auto-apply logic changes even with approval. Manual code change required.",
+                    requires_human_review=True
+                )
+                self._save_patch_history(result)
+                return result
+            # Create a mock diagnosis object for re-application
+            class MockDiagnosis:
+                def __init__(self, data):
+                    self.diagnosis_id = data.get("diagnosis_id")
+                    self.alert_id = data.get("alert_id")
+                    self.root_cause = data.get("root_cause")
+                    self.fix_type = data.get("fix_type")
+                    self.safe_to_auto_apply = True  # Force to true for approval
+                    self.proposed_fix = data.get("proposed_fix")
+                    self.confidence = data.get("confidence")
+                    self.reasoning = data.get("reasoning")
+                    self.component = patch_data.get("component", "unknown")
+            diagnosis = MockDiagnosis(diagnosis_data)
+            # Re-run apply logic with safe_to_auto_apply forced to True
+            if fix_type == "prompt":
+                result = self._apply_prompt_patch(patch_id, diagnosis, datetime.utcnow().isoformat())
+            elif fix_type == "config":
+                result = self._apply_config_patch(patch_id, diagnosis, datetime.utcnow().isoformat())
+            else:
+                result = PatchResult(
+                    patch_id=patch_id,
+                    applied=False,
+                    target_file=f"pending_patches/{patch_id}.patch.json",
+                    change_summary=f"Cannot auto-apply {fix_type} changes",
+                    requires_human_review=True
+                )
+            # Delete pending file if applied
+            if result.applied:
+                pending_file.unlink()
+            self._save_patch_history(result)
+            return result
+        except Exception as e:
+            logger.error(f"Failed to approve pending patch {patch_id}: {e}")
+            return None
+    def reject_pending(self, patch_id: str) -> bool:
+        """
+        Reject a pending patch.
+        Args:
+            patch_id: ID of pending patch
+        Returns:
+            True if rejected, False if not found
+        """
+        pending_file = self.pending_dir / f"{patch_id}.patch.json"
+        if not pending_file.exists():
+            logger.warning(f"Pending patch not found: {patch_id}")
+            return False
+        try:
+            # Record rejection in history
+            result = PatchResult(
+                patch_id=patch_id,
+                applied=False,
+                target_file=f"pending_patches/{patch_id}.patch.json",
+                change_summary="Rejected by user",
+                requires_human_review=False
+            )
+            self._save_patch_history(result)
+            # Delete pending file
+            pending_file.unlink()
+            logger.info(f"Rejected pending patch: {patch_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to reject pending patch {patch_id}: {e}")
+            return False
+    def _apply_prompt_patch(self, patch_id: str, diagnosis, timestamp: str) -> PatchResult:
+        """Apply a prompt patch."""
+        try:
+            # Find matching prompt file based on component name
+            component = getattr(diagnosis, 'component', diagnosis.alert_id)
+            matched_file = self._find_prompt_file(component)
+            if matched_file is None:
+                # No matching prompt file found - queue for review
+                return self._queue_for_review(patch_id, diagnosis, timestamp)
+            # Read current content
+            with open(matched_file, 'r') as f:
+                current_content = f.read()
+            # Append sentinel guidance
+            patch_comment = f"\n\n# [Sentinel patch {patch_id} at {timestamp}]\n"
+            patch_comment += f"# Issue: {diagnosis.root_cause}\n"
+            patch_comment += f"# Fix applied: {diagnosis.proposed_fix}\n"
+            new_content = current_content + patch_comment
+            # Write back
+            with open(matched_file, 'w') as f:
+                f.write(new_content)
+            logger.info(f"Applied prompt patch to {matched_file}")
+            return PatchResult(
+                patch_id=patch_id,
+                applied=True,
+                target_file=str(matched_file),
+                change_summary=f"Appended sentinel guidance to prompt: {diagnosis.proposed_fix[:100]}",
+                requires_human_review=False
+            )
+        except Exception as e:
+            logger.error(f"Failed to apply prompt patch: {e}")
+            return self._queue_for_review(patch_id, diagnosis, timestamp)
+    def _apply_config_patch(self, patch_id: str, diagnosis, timestamp: str) -> PatchResult:
+        """Apply a config patch."""
+        try:
+            config_file = self.sentinel_dir / "sentinel_config.json"
+            # Load existing config
+            if config_file.exists():
+                with open(config_file, 'r') as f:
+                    config = json.load(f)
+            else:
+                config = {}
+            # Add patch
+            config[f"patch_{patch_id}"] = diagnosis.proposed_fix
+            # Write back
+            with open(config_file, 'w') as f:
+                json.dump(config, f, indent=2)
+            logger.info(f"Applied config patch: {patch_id}")
+            return PatchResult(
+                patch_id=patch_id,
+                applied=True,
+                target_file="data/sentinel/sentinel_config.json",
+                change_summary=f"Config patch: {diagnosis.proposed_fix[:100]}",
+                requires_human_review=False
+            )
+        except Exception as e:
+            logger.error(f"Failed to apply config patch: {e}")
+            return self._queue_for_review(patch_id, diagnosis, timestamp)
+    def _queue_for_review(self, patch_id: str, diagnosis, timestamp: str) -> PatchResult:
+        """Queue a patch for human review."""
+        try:
+            pending_file = self.pending_dir / f"{patch_id}.patch.json"
+            patch_data = {
+                "patch_id": patch_id,
+                "timestamp": timestamp,
+                "diagnosis": diagnosis.to_dict() if hasattr(diagnosis, 'to_dict') else diagnosis,
+                "component": getattr(diagnosis, 'component', 'unknown'),
+            }
+            with open(pending_file, 'w') as f:
+                json.dump(patch_data, f, indent=2)
+            logger.info(f"Queued patch for review: {patch_id}")
+            return PatchResult(
+                patch_id=patch_id,
+                applied=False,
+                target_file=f"pending_patches/{patch_id}.patch.json",
+                change_summary=f"Queued for review: {diagnosis.root_cause[:100]}",
+                requires_human_review=True
+            )
+        except Exception as e:
+            logger.error(f"Failed to queue patch for review: {e}")
+            return PatchResult(
+                patch_id=patch_id,
+                applied=False,
+                target_file="error",
+                change_summary=f"Failed to queue: {str(e)}",
+                requires_human_review=True
+            )
+    def _find_prompt_file(self, component: str) -> Optional[Path]:
+        """Find a prompt file matching the component name."""
+        if not self.prompts_dir.exists():
+            return None
+        component_lower = component.lower()
+        # Try exact match first
+        for prompt_file in self.prompts_dir.glob("*.txt"):
+            if component_lower in prompt_file.stem.lower():
+                return prompt_file
+        # Try fuzzy match
+        for prompt_file in self.prompts_dir.glob("*.txt"):
+            stem_lower = prompt_file.stem.lower()
+            if any(word in stem_lower for word in component_lower.split()):
+                return prompt_file
+        return None
+    def _save_patch_history(self, result: PatchResult):
+        """Save patch result to history."""
+        try:
+            history_file = self.sentinel_dir / "patch_history.json"
+            # Load existing history
+            if history_file.exists():
+                with open(history_file, 'r') as f:
+                    history = json.load(f)
+            else:
+                history = []
+            # Append new result
+            history.append(result.to_dict())
+            # Keep only last 200 entries
+            if len(history) > 200:
+                history = history[-200:]
+            # Save back
+            with open(history_file, 'w') as f:
+                json.dump(history, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save patch history: {e}")

backend/app/services/sentinel/scheduler.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""
+Sentinel Scheduler - Background Cycle Execution
+Runs sentinel cycles on a schedule with CPU/battery safeguards.
+Uses asyncio for non-blocking background execution.
+"""
+import asyncio
+import logging
+import os
+from datetime import datetime, timedelta
+from typing import Optional
+try:
+    import psutil
+except ImportError:
+    psutil = None
+logger = logging.getLogger(__name__)
+# Module-level state
+_scheduler_task: Optional[asyncio.Task] = None
+_scheduler_running: bool = False
+async def _sentinel_cycle_loop():
+    """Background loop that runs sentinel cycles."""
+    from app.services.sentinel.sentinel_engine import SentinelEngine
+    global _scheduler_running
+    engine = SentinelEngine()
+    interval_minutes = int(os.getenv("SENTINEL_CYCLE_INTERVAL_MINUTES", "60"))
+    logger.info(f"Sentinel scheduler started (interval: {interval_minutes} minutes)")
+    while _scheduler_running:
+        try:
+            # Check if sentinel is enabled
+            if not os.getenv("SENTINEL_ENABLED", "true").lower() == "true":
+                logger.debug("Sentinel disabled, skipping cycle")
+                await asyncio.sleep(60)
+                continue
+            # Check CPU/battery safeguards
+            if not _check_safeguards():
+                logger.info("Sentinel cycle skipped due to safeguards (high CPU or low battery)")
+                await asyncio.sleep(300)  # Wait 5 minutes before checking again
+                continue
+            # Run cycle
+            logger.info("Starting scheduled sentinel cycle")
+            report = engine.run_cycle()
+            logger.info(f"Sentinel cycle complete: {report.cycle_id}")
+            # Wait for next cycle
+            await asyncio.sleep(interval_minutes * 60)
+        except asyncio.CancelledError:
+            logger.info("Sentinel scheduler cancelled")
+            break
+        except Exception as e:
+            logger.error(f"Sentinel cycle error: {e}")
+            await asyncio.sleep(300)  # Wait 5 minutes on error
+def _check_safeguards() -> bool:
+    """
+    Check CPU and battery safeguards.
+    Returns:
+        True if safe to run, False otherwise
+    """
+    if psutil is None:
+        # If psutil not available, assume safe
+        return True
+    try:
+        # Check CPU usage
+        cpu_threshold = float(os.getenv("SENTINEL_CPU_THRESHOLD", "80.0"))
+        cpu_percent = psutil.cpu_percent(interval=1)
+        if cpu_percent > cpu_threshold:
+            logger.debug(f"CPU usage too high: {cpu_percent}% > {cpu_threshold}%")
+            return False
+        # Check battery (if on laptop)
+        battery = psutil.sensors_battery()
+        if battery is not None:
+            battery_threshold = float(os.getenv("SENTINEL_BATTERY_THRESHOLD", "20.0"))
+            if battery.percent < battery_threshold and not battery.power_plugged:
+                logger.debug(f"Battery too low: {battery.percent}% < {battery_threshold}%")
+                return False
+        return True
+    except Exception as e:
+        logger.warning(f"Safeguard check failed: {e}")
+        return True  # Fail open
+def start_sentinel_scheduler():
+    """
+    Start the sentinel scheduler in the background.
+    This function is called from main.py on app startup.
+    """
+    global _scheduler_task, _scheduler_running
+    if _scheduler_running:
+        logger.warning("Sentinel scheduler already running")
+        return
+    _scheduler_running = True
+    _scheduler_task = asyncio.create_task(_sentinel_cycle_loop())
+    logger.info("Sentinel scheduler task created")
+def stop_sentinel_scheduler():
+    """
+    Stop the sentinel scheduler.
+    This function can be called to gracefully shut down the scheduler.
+    """
+    global _scheduler_task, _scheduler_running
+    if not _scheduler_running:
+        logger.warning("Sentinel scheduler not running")
+        return
+    _scheduler_running = False
+    if _scheduler_task:
+        _scheduler_task.cancel()
+        logger.info("Sentinel scheduler stopped")
+def is_scheduler_running() -> bool:
+    """Check if scheduler is running."""
+    return _scheduler_running

backend/app/services/sentinel/sentinel_engine.py ADDED Viewed

	@@ -0,0 +1,349 @@

+"""
+Sentinel Engine - Core Orchestration
+Coordinates watcher, diagnostician, patcher, and capability tracker
+to perform complete sentinel cycles.
+"""
+import json
+import logging
+import os
+import uuid
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Dict, Any, Optional
+from app.services.sentinel.watcher import SentinelWatcher
+from app.services.sentinel.diagnostician import SentinelDiagnostician
+from app.services.sentinel.patcher import SentinelPatcher
+from app.services.sentinel.capability_tracker import CapabilityTracker
+logger = logging.getLogger(__name__)
+# Module-level state
+SENTINEL_RUNNING: bool = False
+_last_cycle_at: Optional[str] = None
+_last_cycle_report: Optional[dict] = None
+class SentinelCycleReport:
+    """Represents the result of a sentinel cycle."""
+    def __init__(
+        self,
+        cycle_id: str,
+        started_at: str,
+        completed_at: str,
+        alerts_found: int,
+        diagnoses_made: int,
+        patches_applied: int,
+        patches_pending_review: int,
+        capability_snapshot
+    ):
+        self.cycle_id = cycle_id
+        self.started_at = started_at
+        self.completed_at = completed_at
+        self.alerts_found = alerts_found
+        self.diagnoses_made = diagnoses_made
+        self.patches_applied = patches_applied
+        self.patches_pending_review = patches_pending_review
+        self.capability_snapshot = capability_snapshot
+    def dict(self) -> Dict[str, Any]:
+        return {
+            "cycle_id": self.cycle_id,
+            "started_at": self.started_at,
+            "completed_at": self.completed_at,
+            "alerts_found": self.alerts_found,
+            "diagnoses_made": self.diagnoses_made,
+            "patches_applied": self.patches_applied,
+            "patches_pending_review": self.patches_pending_review,
+            "capability_snapshot": self.capability_snapshot.to_dict() if hasattr(self.capability_snapshot, 'to_dict') else self.capability_snapshot,
+        }
+class SentinelEngine:
+    """Core sentinel orchestration engine."""
+    def __init__(self):
+        self.watcher = SentinelWatcher()
+        self.diagnostician = SentinelDiagnostician()
+        self.patcher = SentinelPatcher()
+        self.tracker = CapabilityTracker()
+        self.max_diagnoses = int(os.getenv("SENTINEL_MAX_DIAGNOSES_PER_CYCLE", "5"))
+        self.logger = logging.getLogger("sentinel.engine")
+        self.data_dir = Path("backend/app/data/sentinel")
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+    def run_cycle(self) -> SentinelCycleReport:
+        """
+        Run a complete sentinel cycle.
+        Returns:
+            SentinelCycleReport with cycle results
+        """
+        global SENTINEL_RUNNING, _last_cycle_at, _last_cycle_report
+        # Check if already running
+        if SENTINEL_RUNNING:
+            self.logger.warning("Sentinel cycle already running, skipping")
+            if _last_cycle_report:
+                return SentinelCycleReport(**_last_cycle_report)
+            else:
+                # Return stub report
+                return SentinelCycleReport(
+                    cycle_id="stub",
+                    started_at=datetime.utcnow().isoformat(),
+                    completed_at=datetime.utcnow().isoformat(),
+                    alerts_found=0,
+                    diagnoses_made=0,
+                    patches_applied=0,
+                    patches_pending_review=0,
+                    capability_snapshot=self.tracker.snapshot()
+                )
+        SENTINEL_RUNNING = True
+        cycle_id = str(uuid.uuid4())
+        started_at = datetime.utcnow().isoformat()
+        patches_applied = 0
+        patches_pending = 0
+        diagnoses_made = 0
+        try:
+            # Step 1: Scan for alerts
+            self.logger.info(f"Starting sentinel cycle {cycle_id}")
+            all_alerts = self.watcher.scan()
+            self.logger.info(f"Sentinel scan found {len(all_alerts)} alerts")
+            # Step 2: Filter to high/critical only, respect max_diagnoses limit
+            priority_alerts = [a for a in all_alerts if a.severity in ("high", "critical")]
+            priority_alerts = sorted(
+                priority_alerts,
+                key=lambda a: (a.severity == "critical", a.timestamp),
+                reverse=True
+            )
+            priority_alerts = priority_alerts[:self.max_diagnoses]
+            self.logger.info(f"Processing {len(priority_alerts)} priority alerts")
+            # Step 3: Diagnose each alert
+            diagnoses = []
+            for alert in priority_alerts:
+                try:
+                    self.logger.info(f"Diagnosing alert {alert.alert_id}")
+                    diagnosis = self.diagnostician.diagnose(alert)
+                    diagnoses.append(diagnosis)
+                    diagnoses_made += 1
+                except Exception as e:
+                    self.logger.warning(f"Diagnosis failed for alert {alert.alert_id}: {e}")
+                    continue
+            # Step 4: Apply patches
+            for diagnosis in diagnoses:
+                try:
+                    self.logger.info(f"Applying patch for diagnosis {diagnosis.diagnosis_id}")
+                    result = self.patcher.apply(diagnosis)
+                    if result.applied:
+                        patches_applied += 1
+                        self.logger.info(f"Patch applied: {result.patch_id}")
+                    elif result.requires_human_review:
+                        patches_pending += 1
+                        self.logger.info(f"Patch queued for review: {result.patch_id}")
+                except Exception as e:
+                    self.logger.error(f"Patch application failed: {e}")
+                    continue
+            # Step 5: Capability snapshot
+            self.logger.info("Computing capability snapshot")
+            capability_snapshot = self.tracker.snapshot()
+            # Step 6: Build report
+            completed_at = datetime.utcnow().isoformat()
+            report = SentinelCycleReport(
+                cycle_id=cycle_id,
+                started_at=started_at,
+                completed_at=completed_at,
+                alerts_found=len(all_alerts),
+                diagnoses_made=diagnoses_made,
+                patches_applied=patches_applied,
+                patches_pending_review=patches_pending,
+                capability_snapshot=capability_snapshot
+            )
+            # Step 7: Save to cycle history
+            self._save_cycle_history(report)
+            # Update module state
+            _last_cycle_at = completed_at
+            _last_cycle_report = report.dict()
+            self.logger.info(f"Sentinel cycle {cycle_id} complete: {patches_applied} patches applied, {patches_pending} pending review")
+            return report
+        finally:
+            SENTINEL_RUNNING = False
+    def get_status(self) -> Dict[str, Any]:
+        """
+        Get current sentinel status.
+        Returns:
+            Status dict with health and metrics
+        """
+        try:
+            # Count recent alerts
+            alerts_this_week = self._count_recent_items("alert_history.json", days=7)
+            # Count recent patches
+            patches_this_week = self._count_recent_applied_patches(days=7)
+            # Count pending patches
+            pending_count = self._count_pending_patches()
+            # Get latest AGI index
+            agi_index = self._get_latest_agi_index()
+            return {
+                "sentinel_enabled": os.getenv("SENTINEL_ENABLED", "true").lower() == "true",
+                "current_health": self.watcher.is_healthy(),
+                "last_cycle_at": _last_cycle_at,
+                "sentinel_running": SENTINEL_RUNNING,
+                "agi_progression_index": agi_index,
+                "alerts_this_week": alerts_this_week,
+                "patches_applied_this_week": patches_this_week,
+                "pending_review_count": pending_count,
+            }
+        except Exception as e:
+            self.logger.error(f"Failed to get status: {e}")
+            return {
+                "sentinel_enabled": os.getenv("SENTINEL_ENABLED", "true").lower() == "true",
+                "current_health": True,
+                "last_cycle_at": _last_cycle_at,
+                "sentinel_running": SENTINEL_RUNNING,
+                "agi_progression_index": 0.0,
+                "alerts_this_week": 0,
+                "patches_applied_this_week": 0,
+                "pending_review_count": 0,
+            }
+    def _save_cycle_history(self, report: SentinelCycleReport):
+        """Save cycle report to history."""
+        try:
+            history_file = self.data_dir / "cycle_history.json"
+            # Load existing history
+            if history_file.exists():
+                with open(history_file, 'r') as f:
+                    history = json.load(f)
+            else:
+                history = []
+            # Append new report
+            history.append(report.dict())
+            # Keep only last 20 entries
+            if len(history) > 20:
+                history = history[-20:]
+            # Save back
+            with open(history_file, 'w') as f:
+                json.dump(history, f, indent=2)
+        except Exception as e:
+            self.logger.error(f"Failed to save cycle history: {e}")
+    def _count_recent_items(self, filename: str, days: int) -> int:
+        """Count items in a history file from last N days."""
+        try:
+            file_path = self.data_dir / filename
+            if not file_path.exists():
+                return 0
+            with open(file_path, 'r') as f:
+                items = json.load(f)
+            cutoff = datetime.utcnow() - timedelta(days=days)
+            count = 0
+            for item in items:
+                try:
+                    item_time = datetime.fromisoformat(item["timestamp"])
+                    if item_time >= cutoff:
+                        count += 1
+                except:
+                    continue
+            return count
+        except Exception as e:
+            self.logger.warning(f"Failed to count recent items in {filename}: {e}")
+            return 0
+    def _count_recent_applied_patches(self, days: int) -> int:
+        """Count applied patches from last N days."""
+        try:
+            file_path = self.data_dir / "patch_history.json"
+            if not file_path.exists():
+                return 0
+            with open(file_path, 'r') as f:
+                patches = json.load(f)
+            cutoff = datetime.utcnow() - timedelta(days=days)
+            count = 0
+            for patch in patches:
+                try:
+                    if not patch.get("applied", False):
+                        continue
+                    patch_time = datetime.fromisoformat(patch["timestamp"])
+                    if patch_time >= cutoff:
+                        count += 1
+                except:
+                    continue
+            return count
+        except Exception as e:
+            self.logger.warning(f"Failed to count recent applied patches: {e}")
+            return 0
+    def _count_pending_patches(self) -> int:
+        """Count pending patches."""
+        try:
+            pending_dir = self.data_dir / "pending_patches"
+            if not pending_dir.exists():
+                return 0
+            return len(list(pending_dir.glob("*.patch.json")))
+        except Exception as e:
+            self.logger.warning(f"Failed to count pending patches: {e}")
+            return 0
+    def _get_latest_agi_index(self) -> float:
+        """Get latest AGI progression index."""
+        try:
+            history_file = self.data_dir / "capability_history.json"
+            if not history_file.exists():
+                return 0.0
+            with open(history_file, 'r') as f:
+                history = json.load(f)
+            if not history:
+                return 0.0
+            return history[-1].get("agi_progression_index", 0.0)
+        except Exception as e:
+            self.logger.warning(f"Failed to get latest AGI index: {e}")
+            return 0.0

backend/app/services/sentinel/watcher.py ADDED Viewed

	@@ -0,0 +1,370 @@

+"""
+Sentinel Watcher - System Health Scanner
+Scans existing data directories for signs of system degradation.
+Reads only - never writes. Never calls any LLM.
+"""
+import json
+import logging
+import os
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+import uuid
+logger = logging.getLogger(__name__)
+class SentinelAlert:
+    """Represents a system health alert."""
+    def __init__(
+        self,
+        layer: int,
+        component: str,
+        issue_type: str,
+        severity: str,
+        raw_evidence: str
+    ):
+        self.alert_id = str(uuid.uuid4())
+        self.layer = layer
+        self.component = component
+        self.issue_type = issue_type
+        self.severity = severity
+        self.raw_evidence = raw_evidence
+        self.timestamp = datetime.utcnow().isoformat()
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "alert_id": self.alert_id,
+            "layer": self.layer,
+            "component": self.component,
+            "issue_type": self.issue_type,
+            "severity": self.severity,
+            "raw_evidence": self.raw_evidence,
+            "timestamp": self.timestamp,
+        }
+class SentinelWatcher:
+    """Scans system for health issues."""
+    def __init__(self):
+        self.data_dir = Path("backend/app/data")
+        self._last_scan_time: Optional[datetime] = None
+        self._last_scan_result: Optional[bool] = None
+        self._cache_duration = timedelta(minutes=5)
+    def scan(self) -> List[SentinelAlert]:
+        """
+        Perform comprehensive system health scan.
+        Returns:
+            List of SentinelAlert objects found during scan
+        """
+        alerts = []
+        # Check 1: Log Errors (Layer 1)
+        alerts.extend(self._check_log_errors())
+        # Check 2: Failed Cases (Layer 2)
+        alerts.extend(self._check_failed_cases())
+        # Check 3: Provider Fallback Rate (Layer 1)
+        alerts.extend(self._check_provider_fallbacks())
+        # Check 4: Low Confidence Pattern (Layer 2)
+        alerts.extend(self._check_low_confidence())
+        # Check 5: Knowledge Store Health (Layer 5)
+        alerts.extend(self._check_knowledge_health())
+        # Save alerts to history
+        self._save_alert_history(alerts)
+        logger.info(f"Sentinel scan complete: {len(alerts)} alerts found")
+        return alerts
+    def is_healthy(self) -> bool:
+        """
+        Check if system is healthy (no high or critical alerts).
+        Returns:
+            True if healthy, False otherwise
+        """
+        # Use cached result if recent
+        if self._last_scan_time and self._last_scan_result is not None:
+            if datetime.utcnow() - self._last_scan_time < self._cache_duration:
+                return self._last_scan_result
+        # Perform new scan
+        alerts = self.scan()
+        high_or_critical = [a for a in alerts if a.severity in ("high", "critical")]
+        result = len(high_or_critical) == 0
+        self._last_scan_time = datetime.utcnow()
+        self._last_scan_result = result
+        return result
+    def _check_log_errors(self) -> List[SentinelAlert]:
+        """Check for errors in log files."""
+        alerts = []
+        logs_dir = self.data_dir / "logs"
+        if not logs_dir.exists():
+            return alerts
+        try:
+            error_count = 0
+            first_error = None
+            for log_file in logs_dir.glob("*.log"):
+                try:
+                    with open(log_file, 'r') as f:
+                        lines = f.readlines()
+                        recent_lines = lines[-200:] if len(lines) > 200 else lines
+                        for line in recent_lines:
+                            if any(keyword in line for keyword in ["ERROR", "CRITICAL", "Exception", "Traceback"]):
+                                error_count += 1
+                                if first_error is None:
+                                    first_error = line.strip()[:300]
+                except Exception as e:
+                    logger.warning(f"Failed to read log file {log_file}: {e}")
+                    continue
+            if error_count > 10:
+                alerts.append(SentinelAlert(
+                    layer=1,
+                    component="logs",
+                    issue_type="error",
+                    severity="high",
+                    raw_evidence=f"Found {error_count} errors in logs. First: {first_error or 'N/A'}"
+                ))
+        except Exception as e:
+            logger.warning(f"Failed to check log errors: {e}")
+        return alerts
+    def _check_failed_cases(self) -> List[SentinelAlert]:
+        """Check for failed case executions."""
+        alerts = []
+        memory_dir = self.data_dir / "memory"
+        if not memory_dir.exists() or not any(memory_dir.iterdir()):
+            return alerts
+        try:
+            case_files = sorted(memory_dir.glob("*.json"), key=os.path.getmtime, reverse=True)[:50]
+            if not case_files:
+                return alerts
+            total_count = 0
+            failed_count = 0
+            for case_file in case_files:
+                try:
+                    with open(case_file, 'r') as f:
+                        case_data = json.load(f)
+                    total_count += 1
+                    # Check if case failed
+                    final_answer = case_data.get("final_answer", "")
+                    outputs = case_data.get("outputs", [])
+                    is_failed = False
+                    if not final_answer or not outputs:
+                        is_failed = True
+                    else:
+                        # Check for low confidence in any agent output
+                        for output in outputs:
+                            if isinstance(output, dict):
+                                confidence = output.get("confidence", 1.0)
+                                if confidence < 0.4:
+                                    is_failed = True
+                                    break
+                    if is_failed:
+                        failed_count += 1
+                except Exception as e:
+                    logger.warning(f"Failed to read case file {case_file}: {e}")
+                    continue
+            if total_count > 0:
+                failure_rate = failed_count / total_count
+                if failure_rate > 0.3:
+                    severity = "critical" if failure_rate > 0.6 else "high"
+                    alerts.append(SentinelAlert(
+                        layer=2,
+                        component="agents",
+                        issue_type="degradation",
+                        severity=severity,
+                        raw_evidence=f"Failed case rate: {failure_rate*100:.1f}% of last {total_count} cases"
+                    ))
+        except Exception as e:
+            logger.warning(f"Failed to check failed cases: {e}")
+        return alerts
+    def _check_provider_fallbacks(self) -> List[SentinelAlert]:
+        """Check for excessive provider fallbacks."""
+        alerts = []
+        logs_dir = self.data_dir / "logs"
+        if not logs_dir.exists():
+            return alerts
+        try:
+            fallback_count = 0
+            for log_file in logs_dir.glob("*.log"):
+                try:
+                    with open(log_file, 'r') as f:
+                        lines = f.readlines()
+                        recent_lines = lines[-200:] if len(lines) > 200 else lines
+                        for line in recent_lines:
+                            if any(keyword in line for keyword in ["fallback", "Fallback", "primary provider failed"]):
+                                fallback_count += 1
+                except Exception as e:
+                    logger.warning(f"Failed to read log file {log_file}: {e}")
+                    continue
+            if fallback_count > 20:
+                severity = "high" if fallback_count > 50 else "medium"
+                alerts.append(SentinelAlert(
+                    layer=1,
+                    component="provider",
+                    issue_type="degradation",
+                    severity=severity,
+                    raw_evidence=f"Provider fallback triggered {fallback_count} times in recent logs"
+                ))
+        except Exception as e:
+            logger.warning(f"Failed to check provider fallbacks: {e}")
+        return alerts
+    def _check_low_confidence(self) -> List[SentinelAlert]:
+        """Check for low confidence pattern across cases."""
+        alerts = []
+        memory_dir = self.data_dir / "memory"
+        if not memory_dir.exists() or not any(memory_dir.iterdir()):
+            return alerts
+        try:
+            case_files = sorted(memory_dir.glob("*.json"), key=os.path.getmtime, reverse=True)[:50]
+            if not case_files:
+                return alerts
+            all_confidences = []
+            for case_file in case_files:
+                try:
+                    with open(case_file, 'r') as f:
+                        case_data = json.load(f)
+                    outputs = case_data.get("outputs", [])
+                    for output in outputs:
+                        if isinstance(output, dict):
+                            confidence = output.get("confidence")
+                            if confidence is not None:
+                                all_confidences.append(confidence)
+                except Exception as e:
+                    logger.warning(f"Failed to read case file {case_file}: {e}")
+                    continue
+            if all_confidences:
+                avg_confidence = sum(all_confidences) / len(all_confidences)
+                if avg_confidence < 0.4:
+                    alerts.append(SentinelAlert(
+                        layer=2,
+                        component="synthesizer",
+                        issue_type="degradation",
+                        severity="medium",
+                        raw_evidence=f"Average agent confidence: {avg_confidence:.2f}"
+                    ))
+        except Exception as e:
+            logger.warning(f"Failed to check low confidence: {e}")
+        return alerts
+    def _check_knowledge_health(self) -> List[SentinelAlert]:
+        """Check knowledge store health."""
+        alerts = []
+        knowledge_dir = self.data_dir / "knowledge"
+        if not knowledge_dir.exists():
+            return alerts
+        try:
+            stale_count = 0
+            cutoff = datetime.utcnow() - timedelta(days=7)
+            for knowledge_file in knowledge_dir.glob("*.json"):
+                try:
+                    mtime = datetime.fromtimestamp(knowledge_file.stat().st_mtime)
+                    if mtime < cutoff:
+                        stale_count += 1
+                except Exception as e:
+                    logger.warning(f"Failed to check knowledge file {knowledge_file}: {e}")
+                    continue
+            if stale_count > 10:
+                alerts.append(SentinelAlert(
+                    layer=5,
+                    component="knowledge_store",
+                    issue_type="anomaly",
+                    severity="low",
+                    raw_evidence=f"{stale_count} knowledge items older than 7 days"
+                ))
+        except Exception as e:
+            logger.warning(f"Failed to check knowledge health: {e}")
+        return alerts
+    def _save_alert_history(self, alerts: List[SentinelAlert]):
+        """Save alerts to history file."""
+        try:
+            sentinel_dir = self.data_dir / "sentinel"
+            sentinel_dir.mkdir(parents=True, exist_ok=True)
+            history_file = sentinel_dir / "alert_history.json"
+            # Load existing history
+            if history_file.exists():
+                with open(history_file, 'r') as f:
+                    history = json.load(f)
+            else:
+                history = []
+            # Append new alerts
+            for alert in alerts:
+                history.append(alert.to_dict())
+            # Keep only last 200 entries
+            if len(history) > 200:
+                history = history[-200:]
+            # Save back
+            with open(history_file, 'w') as f:
+                json.dump(history, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save alert history: {e}")

backend/app/services/simulation_store.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, Optional
 from app.config import SIMULATION_DIR
@@ -24,4 +24,53 @@ def get_simulation(simulation_id: str) -> Optional[Dict[str, Any]]:
     path = _path(simulation_id)
     if not path.exists():
         return None
-    return json.loads(path.read_text(encoding="utf-8"))

 import json
 from datetime import datetime
 from pathlib import Path
+from typing import Any, Dict, Optional, List
 from app.config import SIMULATION_DIR
     path = _path(simulation_id)
     if not path.exists():
         return None
+    return json.loads(path.read_text(encoding="utf-8"))
+def list_simulations(limit: Optional[int] = None) -> List[Dict[str, Any]]:
+    """List all simulations with optional limit."""
+    simulations = []
+    for path in Path(SIMULATION_DIR).glob("*.json"):
+        try:
+            data = json.loads(path.read_text(encoding="utf-8"))
+            simulations.append(data)
+        except Exception:
+            continue
+    # Sort by saved_at descending (newest first)
+    simulations.sort(key=lambda x: x.get("saved_at", ""), reverse=True)
+    if limit:
+        return simulations[:limit]
+    return simulations
+def search_simulations(query: str, status: Optional[str] = None) -> List[Dict[str, Any]]:
+    """
+    Search simulations by title or prediction_goal.
+    Optionally filter by status.
+    """
+    all_sims = list_simulations()
+    results = []
+    query_lower = query.lower()
+    for sim in all_sims:
+        # Filter by status if provided
+        if status and sim.get("status") != status:
+            continue
+        # Search in title and prediction_goal
+        title = sim.get("title", "").lower()
+        goal = sim.get("prediction_goal", "").lower()
+        if query_lower in title or query_lower in goal:
+            results.append(sim)
+    return results
+def filter_simulations_by_status(status: str) -> List[Dict[str, Any]]:
+    """Filter simulations by status."""
+    all_sims = list_simulations()
+    return [sim for sim in all_sims if sim.get("status") == status]