DevodG commited on
Commit
b1a271a
·
1 Parent(s): 7a52dca

feat: MiroOrg v1.1 — AI Financial Intelligence System

Browse files

- 6-layer architecture: Core Platform, Agents, Domain Packs, Simulation, Learning, Sentinel
- Finance domain pack: stance detection, event analysis, ticker/entity resolution
- Finance intelligence API: /finance/ticker, /finance/news/analyze, /finance/headlines
- Learning layer: knowledge ingestion, skill distillation, prompt optimization, A/B testing
- Sentinel layer: health monitoring, LLM-based diagnosis, safe auto-patching, capability tracking
- Frontend: JANUS dark UI with Command/Intel Stream/Markets tabs
- Intel Stream: news search with Deep Research (full 5-agent pipeline) on any article
- Markets: ticker search with TradingView chart (NSE/BSE support), AI signal, Deep Research
- Top navigation bar replacing sidebar for full-width layout
- All sensitive data excluded via .gitignore (no .env, no runtime JSON, no API keys)

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +46 -4
  2. .kiro/specs/ai-financial-intelligence-system/requirements.md +85 -0
  3. .kiro/specs/ai-financial-intelligence-system/tasks.md +104 -104
  4. ARCHITECTURE.md +567 -0
  5. README.md +282 -116
  6. backend/.env.example +10 -1
  7. backend/app/agents/planner.py +45 -2
  8. backend/app/agents/research.py +64 -2
  9. backend/app/agents/switchboard.py +26 -3
  10. backend/app/agents/synthesizer.py +69 -2
  11. backend/app/agents/verifier.py +75 -2
  12. backend/app/config.py +58 -5
  13. backend/app/data/sentinel/.gitkeep +8 -0
  14. backend/app/data/sentinel/pending_patches/.gitkeep +3 -0
  15. backend/app/graph.py +69 -11
  16. backend/app/main.py +104 -10
  17. backend/app/prompts/planner.txt +89 -17
  18. backend/app/prompts/research.txt +87 -19
  19. backend/app/prompts/synthesizer.txt +100 -12
  20. backend/app/prompts/verifier.txt +111 -16
  21. backend/app/routers/finance.py +297 -0
  22. backend/app/routers/learning.py +282 -0
  23. backend/app/routers/sentinel.py +330 -0
  24. backend/app/routers/simulation.py +86 -9
  25. backend/app/schemas.py +161 -15
  26. backend/app/services/api_discovery/__init__.py +13 -0
  27. backend/app/services/api_discovery/catalog_loader.py +85 -0
  28. backend/app/services/api_discovery/classifier.py +92 -0
  29. backend/app/services/api_discovery/metadata_store.py +60 -0
  30. backend/app/services/api_discovery/scorer.py +99 -0
  31. backend/app/services/case_store.py +16 -0
  32. backend/app/services/external_sources.py +37 -10
  33. backend/app/services/learning/__init__.py +29 -0
  34. backend/app/services/learning/freshness_manager.py +10 -0
  35. backend/app/services/learning/knowledge_ingestor.py +193 -0
  36. backend/app/services/learning/knowledge_store.py +225 -0
  37. backend/app/services/learning/learning_engine.py +439 -0
  38. backend/app/services/learning/prompt_optimizer.py +274 -0
  39. backend/app/services/learning/scheduler.py +220 -0
  40. backend/app/services/learning/skill_distiller.py +279 -0
  41. backend/app/services/learning/trust_manager.py +245 -0
  42. backend/app/services/mirofish_client.py +237 -53
  43. backend/app/services/sentinel/__init__.py +14 -0
  44. backend/app/services/sentinel/capability_tracker.py +414 -0
  45. backend/app/services/sentinel/diagnostician.py +219 -0
  46. backend/app/services/sentinel/patcher.py +359 -0
  47. backend/app/services/sentinel/scheduler.py +141 -0
  48. backend/app/services/sentinel/sentinel_engine.py +349 -0
  49. backend/app/services/sentinel/watcher.py +370 -0
  50. backend/app/services/simulation_store.py +51 -2
.gitignore CHANGED
@@ -1,10 +1,31 @@
 
1
  backend/.venv/
 
 
 
 
 
 
 
 
 
 
2
  backend/.env
 
 
 
 
 
 
 
 
 
3
  frontend/node_modules/
4
  frontend/.next/
5
- .DS_Store
6
- **/__pycache__/
7
- *.pyc
 
8
  backend/app/data/memory/*.json
9
  backend/app/data/simulations/*.json
10
  backend/app/data/logs/*
@@ -12,5 +33,26 @@ backend/app/data/knowledge/*.json
12
  backend/app/data/skills/*.json
13
  backend/app/data/prompt_versions/*.json
14
  backend/app/data/learning/*.json
15
- .venv/
 
 
 
 
 
 
 
 
 
 
 
 
16
  *.log
 
 
 
 
 
 
 
 
 
 
1
+ # ── Python ────────────────────────────────────────────────────
2
  backend/.venv/
3
+ .venv/
4
+ **/__pycache__/
5
+ *.pyc
6
+ *.pyo
7
+ *.pyd
8
+ *.egg-info/
9
+ dist/
10
+ build/
11
+
12
+ # ── Environment / Secrets ─────────────────────────────────────
13
  backend/.env
14
+ .env
15
+ .env.local
16
+ .env.*.local
17
+ *.key
18
+ *.pem
19
+ *.p12
20
+ *.pfx
21
+
22
+ # ── Frontend ──────────────────────────────────────────────────
23
  frontend/node_modules/
24
  frontend/.next/
25
+ frontend/.turbo/
26
+ frontend/out/
27
+
28
+ # ── Runtime data (never commit) ───────────────────────────────
29
  backend/app/data/memory/*.json
30
  backend/app/data/simulations/*.json
31
  backend/app/data/logs/*
 
33
  backend/app/data/skills/*.json
34
  backend/app/data/prompt_versions/*.json
35
  backend/app/data/learning/*.json
36
+
37
+ # Sentinel runtime data
38
+ backend/app/data/sentinel/*.json
39
+ backend/app/data/sentinel/pending_patches/*.json
40
+ !backend/app/data/sentinel/.gitkeep
41
+ !backend/app/data/sentinel/pending_patches/.gitkeep
42
+
43
+ # ── OS / Editor ───────────────────────────────────────────────
44
+ .DS_Store
45
+ .DS_Store?
46
+ Thumbs.db
47
+ .vscode/settings.json
48
+ .vscode/launch.json
49
  *.log
50
+
51
+ # ── Misc ──────────────────────────────────────────────────────
52
+ *.bak
53
+ *.tmp
54
+ *_new.tsx
55
+ *_old.tsx
56
+
57
+ # Stray nested directory
58
+ backend/backend/
.kiro/specs/ai-financial-intelligence-system/requirements.md CHANGED
@@ -471,3 +471,88 @@ This document specifies requirements for MiroOrg v1.1, a general intelligence op
471
  60. THE System SHALL provide GET /prompts/versions/{name} endpoint for prompt version history
472
  61. THE System SHALL provide POST /prompts/optimize/{name} endpoint for prompt optimization
473
  62. THE System SHALL provide POST /prompts/promote/{name}/{version} endpoint for promoting prompt versions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  60. THE System SHALL provide GET /prompts/versions/{name} endpoint for prompt version history
472
  61. THE System SHALL provide POST /prompts/optimize/{name} endpoint for prompt optimization
473
  62. THE System SHALL provide POST /prompts/promote/{name}/{version} endpoint for promoting prompt versions
474
+
475
+
476
+ ### Requirement 18: Sentinel Layer (Adaptive Maintenance)
477
+
478
+ **User Story:** As a user, I want the system to monitor its own health and propose safe corrections when issues are detected, so that the system can self-heal minor problems without breaking anything or requiring constant manual intervention.
479
+
480
+ #### Acceptance Criteria
481
+
482
+ 1. THE System SHALL implement a Sentinel Layer as Layer 6 in the architecture
483
+ 2. THE System SHALL observe system health metrics: provider success rates, agent output quality, error frequencies, response times, storage usage
484
+ 3. THE System SHALL diagnose common issues: provider failures, degraded prompts, stale knowledge, configuration drift
485
+ 4. THE System SHALL propose limited safe corrections: prompt adjustments, configuration tweaks, knowledge refresh
486
+ 5. THE System SHALL NEVER autonomously edit Python or TypeScript code
487
+ 6. THE System SHALL NEVER make risky changes without explicit human approval
488
+ 7. THE System SHALL maintain a conservative safety-first approach
489
+
490
+ #### Health Monitoring
491
+
492
+ 8. THE System SHALL track provider health: success rate, average latency, error types, fallback frequency
493
+ 9. THE System SHALL track agent performance: output quality, confidence scores, execution time, error rates
494
+ 10. THE System SHALL track storage health: disk usage, cache hit rates, eviction frequency
495
+ 11. THE System SHALL track external API health: rate limit status, error rates, response times
496
+ 12. THE System SHALL detect anomalies: sudden quality drops, repeated errors, resource exhaustion
497
+
498
+ #### Diagnostic Capabilities
499
+
500
+ 13. THE System SHALL diagnose provider issues: API key expiration, rate limiting, service outages
501
+ 14. THE System SHALL diagnose prompt degradation: low confidence scores, repeated failures, user corrections
502
+ 15. THE System SHALL diagnose knowledge staleness: outdated information, expired cache entries
503
+ 16. THE System SHALL diagnose configuration drift: missing environment variables, invalid settings
504
+ 17. THE System SHALL provide clear diagnostic reports with evidence and severity levels
505
+
506
+ #### Safe Correction Proposals
507
+
508
+ 18. THE System SHALL propose prompt improvements when quality degrades
509
+ 19. THE System SHALL propose knowledge refresh when staleness is detected
510
+ 20. THE System SHALL propose configuration updates when drift is detected
511
+ 21. THE System SHALL propose provider fallback adjustments when reliability changes
512
+ 22. EACH proposal SHALL include: issue description, proposed fix, risk level, rollback plan
513
+ 23. THE System SHALL require human approval for MEDIUM and HIGH risk changes
514
+ 24. THE System SHALL auto-apply only LOW risk changes with full audit logging
515
+
516
+ #### Safety Constraints
517
+
518
+ 25. THE System SHALL NEVER modify source code files (.py, .ts, .tsx, .js)
519
+ 26. THE System SHALL NEVER delete data directories or user content
520
+ 27. THE System SHALL NEVER change API keys or credentials
521
+ 28. THE System SHALL NEVER disable critical system components
522
+ 29. THE System SHALL maintain full audit logs of all proposed and applied changes
523
+ 30. THE System SHALL provide rollback capability for all applied changes
524
+
525
+ #### Capability Trend Tracking
526
+
527
+ 31. THE System SHALL track capability trends over time: answer quality, user satisfaction proxies, system reliability
528
+ 32. THE System SHALL detect capability regressions: quality drops, increased errors, slower responses
529
+ 33. THE System SHALL correlate regressions with recent changes: prompt updates, configuration changes, knowledge updates
530
+ 34. THE System SHALL alert when capabilities degrade beyond acceptable thresholds
531
+
532
+ #### Scheduler Integration
533
+
534
+ 35. THE System SHALL run health checks on a configurable schedule (default: hourly)
535
+ 36. THE System SHALL run diagnostics when anomalies are detected
536
+ 37. THE System SHALL use the same lightweight scheduler as the learning layer
537
+ 38. THE System SHALL respect system resource constraints (CPU, battery, idle time)
538
+ 39. THE System SHALL NOT interfere with user operations or learning tasks
539
+
540
+ #### API Endpoints
541
+
542
+ 40. THE System SHALL provide GET /sentinel/status endpoint for sentinel system status
543
+ 41. THE System SHALL provide GET /sentinel/health endpoint for comprehensive health report
544
+ 42. THE System SHALL provide GET /sentinel/diagnostics endpoint for recent diagnostic results
545
+ 43. THE System SHALL provide GET /sentinel/proposals endpoint for pending correction proposals
546
+ 44. THE System SHALL provide POST /sentinel/proposals/{id}/approve endpoint for approving proposals
547
+ 45. THE System SHALL provide POST /sentinel/proposals/{id}/reject endpoint for rejecting proposals
548
+ 46. THE System SHALL provide GET /sentinel/audit endpoint for audit log of applied changes
549
+ 47. THE System SHALL provide POST /sentinel/rollback/{change_id} endpoint for rolling back changes
550
+
551
+ #### Integration with Existing Layers
552
+
553
+ 48. THE System SHALL integrate with learning layer for quality metrics
554
+ 49. THE System SHALL integrate with provider abstraction for health monitoring
555
+ 50. THE System SHALL integrate with prompt management for prompt quality tracking
556
+ 51. THE System SHALL integrate with knowledge store for staleness detection
557
+ 52. THE System SHALL integrate with configuration system for drift detection
558
+
.kiro/specs/ai-financial-intelligence-system/tasks.md CHANGED
@@ -114,72 +114,72 @@ The implementation follows 9 phases, each building on the previous while maintai
114
 
115
  ### Phase 3: Agent Enhancement with Domain Intelligence
116
 
117
- - [ ] 7. Enhance Switchboard with domain detection
118
- - [ ] 7.1 Add domain pack dimension to routing
119
  - Modify `backend/app/agents/switchboard.py` to add domain_pack to routing decision
120
  - Implement domain detection using domain registry
121
  - Update RouteDecision schema in `backend/app/schemas.py`
122
  - _Requirements: 4.1, 5.6_
123
 
124
- - [ ] 7.2 Implement complexity-based routing logic
125
  - Ensure simple queries (≤5 words) route to solo mode
126
  - Ensure medium queries (≤25 words) route to standard mode
127
  - Ensure complex queries (>25 words) route to deep mode
128
  - _Requirements: 4.2, 4.3, 4.4_
129
 
130
- - [ ] 7.3 Implement simulation keyword detection
131
  - Add simulation trigger keyword detection
132
  - Load keywords from environment configuration
133
  - Set task_family="simulation" when keywords detected
134
  - _Requirements: 4.5, 4.6, 4.7_
135
 
136
- - [ ] 8. Enhance Research Agent with domain capabilities
137
- - [ ] 8.1 Integrate domain pack research enhancement
138
  - Modify `backend/app/agents/research.py` to detect domain
139
  - Call domain pack enhance_research() when domain detected
140
  - Add structured entity extraction output
141
  - _Requirements: 5.3, 7.1, 7.2, 7.3, 7.4_
142
 
143
- - [ ] 8.2 Update research agent prompt
144
  - Update `backend/app/prompts/research.txt` with domain intelligence instructions
145
  - Add instructions for entity and ticker extraction
146
  - Add instructions for structured output
147
  - _Requirements: 7.14_
148
 
149
- - [ ] 9. Enhance Verifier Agent with domain capabilities
150
- - [ ] 9.1 Integrate domain pack verification enhancement
151
  - Modify `backend/app/agents/verifier.py` to detect domain
152
  - Call domain pack enhance_verification() when domain detected
153
  - Add structured credibility scoring output
154
  - _Requirements: 5.3, 7.7, 7.8, 7.9_
155
 
156
- - [ ] 9.2 Update verifier agent prompt
157
  - Update `backend/app/prompts/verifier.txt` with domain intelligence instructions
158
  - Add instructions for rumor and scam detection
159
  - Add instructions for uncertainty surfacing
160
  - _Requirements: 7.14_
161
 
162
- - [ ] 10. Enhance Planner and Synthesizer Agents
163
- - [ ] 10.1 Add simulation mode suggestion to Planner
164
  - Modify `backend/app/agents/planner.py` to detect simulation opportunities
165
  - Add simulation_suggested field to output
166
  - Update `backend/app/prompts/planner.txt` with simulation guidance
167
  - _Requirements: 7.6, 7.14_
168
 
169
- - [ ] 10.2 Add uncertainty quantification to Synthesizer
170
  - Modify `backend/app/agents/synthesizer.py` to quantify uncertainty
171
  - Add simulation recommendation logic
172
  - Update `backend/app/prompts/synthesizer.txt` with uncertainty instructions
173
  - _Requirements: 7.11, 7.12, 7.13, 7.14_
174
 
175
- - [ ] 11. Update graph execution with domain context
176
- - [ ] 11.1 Pass domain pack context through pipeline
177
  - Modify `backend/app/graph.py` to detect domain early
178
  - Pass domain context to all agents
179
  - Ensure domain-enhanced execution flows correctly
180
  - _Requirements: 2.5, 5.3, 7.15_
181
 
182
- - [ ] 12. Checkpoint - Verify agent enhancements
183
  - Ensure Switchboard detects finance domain correctly
184
  - Verify Research agent extracts entities and tickers
185
  - Verify Verifier agent scores credibility
@@ -188,32 +188,32 @@ The implementation follows 9 phases, each building on the previous while maintai
188
 
189
  ### Phase 4: Simulation Integration Enhancement
190
 
191
- - [ ] 13. Enhance simulation workflow and case linking
192
- - [ ] 13.1 Add case linking to simulation router
193
  - Modify `backend/app/routers/simulation.py` to link case_id
194
  - Improve error messages for MiroFish failures
195
  - Add better status reporting
196
  - _Requirements: 8.1, 8.11, 8.12_
197
 
198
- - [ ] 13.2 Enhance simulation store with search and filtering
199
  - Modify `backend/app/services/simulation_store.py`
200
  - Add simulation search by title or prediction_goal
201
  - Add simulation filtering by status
202
  - _Requirements: 8.10_
203
 
204
- - [ ] 13.3 Update case storage for simulation linking
205
  - Modify `backend/app/services/case_store.py` to add simulation_id field
206
  - Add case-to-simulation lookup functionality
207
  - Update CaseRecord schema in `backend/app/schemas.py`
208
  - _Requirements: 10.8, 14.9_
209
 
210
- - [ ] 13.4 Add simulation workflow to graph execution
211
  - Modify `backend/app/graph.py` to add simulation handoff logic
212
  - Add simulation result synthesis
213
  - Ensure simulation results flow into final answer
214
  - _Requirements: 3.4, 8.13_
215
 
216
- - [ ] 14. Checkpoint - Verify simulation integration
217
  - Ensure simulation requests create linked cases
218
  - Verify cases with simulations show simulation_id
219
  - Verify simulation results are synthesized correctly
@@ -221,8 +221,8 @@ The implementation follows 9 phases, each building on the previous while maintai
221
 
222
  ### Phase 5: API Discovery Subsystem
223
 
224
- - [ ] 15. Create API discovery infrastructure
225
- - [ ] 15.1 Create API discovery structure
226
  - Create `backend/app/services/api_discovery/__init__.py`
227
  - Create `backend/app/services/api_discovery/catalog_loader.py`
228
  - Create `backend/app/services/api_discovery/classifier.py`
@@ -230,12 +230,12 @@ The implementation follows 9 phases, each building on the previous while maintai
230
  - Create `backend/app/services/api_discovery/metadata_store.py`
231
  - _Requirements: 9.3, 9.4_
232
 
233
- - [ ] 15.2 Implement catalog loader
234
  - Implement load_public_apis_catalog() to fetch from GitHub or local cache
235
  - Parse API entries with name, description, auth, HTTPS, CORS, category, link
236
  - _Requirements: 9.3, 9.6_
237
 
238
- - [ ] 15.3 Implement API classifier and scorer
239
  - Implement classify_api() to categorize APIs by domain
240
  - Implement score_api_usefulness() to prioritize APIs for integration
241
  - Consider auth simplicity, HTTPS, CORS, category relevance
@@ -247,7 +247,7 @@ The implementation follows 9 phases, each building on the previous while maintai
247
  - Add `GET /api-discovery/top-scored` endpoint
248
  - _Requirements: 9.4_
249
 
250
- - [ ] 16. Checkpoint - Verify API discovery
251
  - Ensure catalog loads successfully
252
  - Verify APIs are classified correctly
253
  - Verify scoring produces reasonable priorities
@@ -255,94 +255,94 @@ The implementation follows 9 phases, each building on the previous while maintai
255
 
256
  ### Phase 6: Frontend Enhancement
257
 
258
- - [ ] 17. Create layout and navigation infrastructure
259
- - [ ] 17.1 Create layout components
260
  - Create `frontend/src/components/layout/Header.tsx` with branding and navigation
261
  - Create `frontend/src/components/layout/Navigation.tsx` with tab navigation
262
  - Modify `frontend/src/app/layout.tsx` to use new layout components
263
  - _Requirements: 12.1, 12.2_
264
 
265
- - [ ] 17.2 Create common UI components
266
  - Create `frontend/src/components/common/Badge.tsx` for status indicators
267
  - Create `frontend/src/components/common/Card.tsx` for content containers
268
  - Create `frontend/src/components/common/LoadingSpinner.tsx` for loading states
269
  - Create `frontend/src/components/common/ErrorMessage.tsx` for error display
270
  - _Requirements: 12.10, 12.11, 12.15_
271
 
272
- - [ ] 17.3 Create API client and type definitions
273
  - Create `frontend/src/lib/api.ts` with MiroOrgClient class
274
  - Implement methods for all backend endpoints
275
  - Create `frontend/src/lib/types.ts` with TypeScript interfaces
276
  - _Requirements: 11.21_
277
 
278
- - [ ] 18. Create Main Dashboard page
279
- - [ ] 18.1 Implement dashboard with system overview
280
  - Modify `frontend/src/app/page.tsx` to show quick stats
281
  - Display recent cases summary
282
  - Display system health status
283
  - Add navigation to main features
284
  - _Requirements: 12.2_
285
 
286
- - [ ] 19. Create Analyze page and components
287
- - [ ] 19.1 Create Analyze page structure
288
  - Create `frontend/src/app/analyze/page.tsx` with analysis interface
289
  - Create `frontend/src/components/analyze/TaskInput.tsx` for user input
290
  - Create `frontend/src/components/analyze/ModeSelector.tsx` for mode selection
291
  - _Requirements: 12.3, 12.7, 12.8_
292
 
293
- - [ ] 19.2 Create result display components
294
  - Create `frontend/src/components/analyze/ResultViewer.tsx` for final answers
295
  - Create `frontend/src/components/analyze/AgentOutputPanel.tsx` for agent outputs
296
  - Display route/debug badges and confidence indicators
297
  - _Requirements: 12.9, 12.10, 12.11, 12.14_
298
 
299
- - [ ] 20. Create Cases page and components
300
- - [ ] 20.1 Create Cases history interface
301
  - Create `frontend/src/app/cases/page.tsx` with case list
302
  - Create `frontend/src/components/cases/CaseList.tsx` for listing cases
303
  - Create `frontend/src/components/cases/CaseCard.tsx` for case preview
304
  - _Requirements: 12.4, 12.17_
305
 
306
- - [ ] 20.2 Create Case detail view
307
  - Create `frontend/src/app/cases/[id]/page.tsx` for case details
308
  - Create `frontend/src/components/cases/CaseDetail.tsx` for full case display
309
  - Display case_id, routing decision, agent outputs, timestamps
310
  - _Requirements: 12.17_
311
 
312
- - [ ] 21. Create Simulation page and components
313
- - [ ] 21.1 Create Simulation submission interface
314
  - Create `frontend/src/app/simulation/page.tsx` with simulation form
315
  - Create `frontend/src/components/simulation/SimulationForm.tsx` for input
316
  - Create `frontend/src/components/simulation/SimulationStatus.tsx` for status display
317
  - _Requirements: 12.6, 12.13_
318
 
319
- - [ ] 21.2 Create Simulation detail and chat interface
320
  - Create `frontend/src/app/simulation/[id]/page.tsx` for simulation details
321
  - Create `frontend/src/components/simulation/SimulationReport.tsx` for report display
322
  - Create `frontend/src/components/simulation/SimulationChat.tsx` for post-simulation chat
323
  - _Requirements: 12.13_
324
 
325
- - [ ] 22. Create Prompt Lab and Config pages
326
- - [ ] 22.1 Create Prompt Lab interface
327
  - Create `frontend/src/app/prompts/page.tsx` with prompt management
328
  - Create `frontend/src/components/prompts/PromptList.tsx` for listing prompts
329
  - Create `frontend/src/components/prompts/PromptEditor.tsx` for editing
330
  - _Requirements: 12.5_
331
 
332
- - [ ] 22.2 Create Config page
333
  - Create `frontend/src/app/config/page.tsx` with system configuration view
334
  - Display provider status, feature flags, health checks
335
  - _Requirements: 12.1_
336
 
337
- - [ ] 23. Implement dark theme and styling
338
- - [ ] 23.1 Update global styles with dark theme
339
  - Modify `frontend/src/app/globals.css` with dark color palette
340
  - Implement card-based structure with subtle borders
341
  - Add animations and transitions
342
  - Use Inter font family
343
  - _Requirements: 12.15, 12.16_
344
 
345
- - [ ] 24. Checkpoint - Verify frontend functionality
346
  - Ensure all pages are accessible via navigation
347
  - Verify Analyze workflow works end-to-end
348
  - Verify Case history displays correctly
@@ -353,7 +353,7 @@ The implementation follows 9 phases, each building on the previous while maintai
353
 
354
  ### Phase 7: Testing and Documentation
355
 
356
- - [ ] 25. Write unit tests for core functionality
357
  - [ ]* 25.1 Write provider abstraction tests
358
  - Test OpenRouter, Ollama, OpenAI provider calls
359
  - Test provider fallback behavior
@@ -388,7 +388,7 @@ The implementation follows 9 phases, each building on the previous while maintai
388
  - Test error handling for disabled MiroFish
389
  - _Requirements: 8.1, 8.11, 8.12_
390
 
391
- - [ ] 26. Write property-based tests
392
  - [ ]* 26.1 Write Property 1: Configuration Environment Isolation
393
  - **Property 1: Configuration Environment Isolation**
394
  - **Validates: Requirements 1.8, 6.7**
@@ -479,7 +479,7 @@ The implementation follows 9 phases, each building on the previous while maintai
479
  - Test that new domain packs don't require agent changes
480
  - Create mock domain pack and verify integration
481
 
482
- - [ ] 27. Write integration tests
483
  - [ ]* 27.1 Write end-to-end case execution test
484
  - Test complete workflow from user input to final answer
485
  - Verify all agents execute correctly
@@ -502,8 +502,8 @@ The implementation follows 9 phases, each building on the previous while maintai
502
  - Verify finance pack capabilities are used
503
  - _Requirements: 5.3, 7.1, 7.7_
504
 
505
- - [ ] 28. Create comprehensive documentation
506
- - [ ] 28.1 Update main README
507
  - Update `README.md` with architecture overview
508
  - Document four-layer architecture
509
  - Document agent roles and responsibilities
@@ -511,26 +511,26 @@ The implementation follows 9 phases, each building on the previous while maintai
511
  - Add environment variable reference
512
  - _Requirements: 13.2, 13.3, 13.4, 13.10, 13.11, 13.12_
513
 
514
- - [ ] 28.2 Create architecture documentation
515
  - Create `ARCHITECTURE.md` with detailed architecture description
516
  - Document component interactions
517
  - Document data flow
518
  - _Requirements: 13.10_
519
 
520
- - [ ] 28.3 Create domain pack documentation
521
  - Create `DOMAIN_PACKS.md` with domain pack integration guide
522
  - Document how to create new domain packs
523
  - Document finance pack capabilities
524
  - _Requirements: 13.13_
525
 
526
- - [ ] 28.4 Create testing documentation
527
  - Create `TESTING.md` with testing strategy and guidelines
528
  - Document unit test patterns
529
  - Document property-based test patterns
530
  - Document integration test patterns
531
  - _Requirements: 14.1, 14.2, 14.3_
532
 
533
- - [ ] 28.5 Create deployment documentation
534
  - Create `DEPLOYMENT.md` with deployment instructions
535
  - Document environment setup
536
  - Document dependency installation
@@ -546,27 +546,27 @@ The implementation follows 9 phases, each building on the previous while maintai
546
 
547
  ### Phase 8: Cleanup and Optimization
548
 
549
- - [ ] 30. Remove dead code and optimize performance
550
- - [ ] 30.1 Clean up codebase
551
  - Remove unused imports across all files
552
  - Remove commented code
553
  - Remove duplicate implementations
554
  - _Requirements: 1.5_
555
 
556
- - [ ] 30.2 Optimize external API performance
557
  - Add caching for market quotes with 5 minute TTL
558
  - Verify connection pooling is implemented
559
  - Verify request timeouts are configured
560
  - Add rate limiting for external APIs
561
  - _Requirements: 15.3, 15.4, 15.5, 15.6_
562
 
563
- - [ ] 30.3 Polish error messages and logging
564
  - Review all error messages for clarity and consistency
565
  - Review log levels for appropriateness
566
  - Add missing log entries for key operations
567
  - _Requirements: 9.3, 9.8_
568
 
569
- - [ ] 30.4 Security review
570
  - Verify no API keys in source code
571
  - Verify error messages don't leak internals
572
  - Verify input validation is comprehensive
@@ -579,7 +579,7 @@ The implementation follows 9 phases, each building on the previous while maintai
579
  - Identify and address bottlenecks
580
  - _Requirements: 15.1, 15.2_
581
 
582
- - [ ] 31. Final checkpoint - System verification
583
  - Ensure no dead code remains
584
  - Verify performance meets requirements
585
  - Verify error messages are clear and consistent
@@ -589,15 +589,15 @@ The implementation follows 9 phases, each building on the previous while maintai
589
 
590
  ### Phase 9: Autonomous Knowledge Evolution Layer
591
 
592
- - [ ] 32. Create learning subsystem infrastructure
593
- - [ ] 32.1 Create learning service structure
594
  - Create `backend/app/services/learning/__init__.py`
595
  - Create `backend/app/services/learning/knowledge_ingestor.py`
596
  - Create `backend/app/services/learning/knowledge_store.py`
597
  - Create `backend/app/services/learning/learning_engine.py`
598
  - _Requirements: 17.1, 17.2, 17.3_
599
 
600
- - [ ] 32.2 Create additional learning services
601
  - Create `backend/app/services/learning/prompt_optimizer.py`
602
  - Create `backend/app/services/learning/skill_distiller.py`
603
  - Create `backend/app/services/learning/trust_manager.py`
@@ -605,22 +605,22 @@ The implementation follows 9 phases, each building on the previous while maintai
605
  - Create `backend/app/services/learning/scheduler.py`
606
  - _Requirements: 17.1, 17.17, 17.23, 17.28, 17.40_
607
 
608
- - [ ] 32.3 Create data directories
609
  - Create `backend/app/data/knowledge/` directory
610
  - Create `backend/app/data/skills/` directory
611
  - Create `backend/app/data/prompt_versions/` directory
612
  - Create `backend/app/data/learning/` directory
613
  - _Requirements: 17.33, 17.34, 17.35, 17.36_
614
 
615
- - [ ] 33. Implement knowledge ingestion and storage
616
- - [ ] 33.1 Implement knowledge ingestion
617
  - Implement ingest_from_search() using Tavily API
618
  - Implement ingest_from_url() using Jina Reader
619
  - Implement ingest_from_news() using NewsAPI
620
  - Implement compress_content() for summarization (2-4KB limit)
621
  - _Requirements: 17.8, 17.9, 17.10, 17.11_
622
 
623
- - [ ] 33.2 Implement knowledge store
624
  - Implement save_knowledge() with JSON storage
625
  - Implement get_knowledge() and search_knowledge()
626
  - Implement delete_expired_knowledge() with auto-cleanup
@@ -628,32 +628,32 @@ The implementation follows 9 phases, each building on the previous while maintai
628
  - Implement LRU eviction when limit reached
629
  - _Requirements: 17.4, 17.5, 17.33, 17.38_
630
 
631
- - [ ] 33.3 Add knowledge schemas
632
  - Add KnowledgeItem schema to `backend/app/schemas.py`
633
  - Add validation for summary length (2-4KB)
634
  - Add trust_score and freshness_score fields
635
  - _Requirements: 17.9_
636
 
637
- - [ ] 34. Implement experience learning
638
- - [ ] 34.1 Implement case learning
639
  - Implement learn_from_case() to extract metadata
640
  - Implement detect_patterns() for repeated patterns
641
  - Implement get_route_effectiveness() for routing insights
642
  - Implement get_prompt_performance() for prompt insights
643
  - _Requirements: 17.13, 17.14, 17.15, 17.16_
644
 
645
- - [ ] 34.2 Add case learning schemas
646
  - Add CaseLearning schema to `backend/app/schemas.py`
647
  - Add fields for route_effectiveness, prompt_performance, provider_reliability
648
  - _Requirements: 17.13_
649
 
650
- - [ ] 34.3 Hook learning into case save flow
651
  - Modify `backend/app/services/case_store.py` to call learn_from_case()
652
  - Store case learning metadata separately
653
  - _Requirements: 17.44_
654
 
655
- - [ ] 35. Implement prompt evolution
656
- - [ ] 35.1 Implement prompt versioning
657
  - Implement create_prompt_variant() using provider API
658
  - Implement test_prompt_variant() with quality metrics
659
  - Implement compare_prompts() for A/B testing
@@ -661,66 +661,66 @@ The implementation follows 9 phases, each building on the previous while maintai
661
  - Implement archive_prompt() for old versions
662
  - _Requirements: 17.17, 17.18, 17.19, 17.20, 17.21, 17.22_
663
 
664
- - [ ] 35.2 Add prompt version schemas
665
  - Add PromptVersion schema to `backend/app/schemas.py`
666
  - Add fields for version, status, win_rate, test_count
667
  - _Requirements: 17.17_
668
 
669
- - [ ] 35.3 Integrate with prompt management
670
  - Hook prompt versions into prompt loading
671
  - Store prompt history in prompt_versions directory
672
  - _Requirements: 17.47_
673
 
674
- - [ ] 36. Implement skill distillation
675
- - [ ] 36.1 Implement skill detection and creation
676
  - Implement detect_skill_candidates() from patterns
677
  - Implement distill_skill() to create skill records
678
  - Implement test_skill() for validation
679
  - Implement apply_skill() for skill usage
680
  - _Requirements: 17.23, 17.24, 17.25, 17.26, 17.27_
681
 
682
- - [ ] 36.2 Add skill schemas
683
  - Add Skill schema to `backend/app/schemas.py`
684
  - Add fields for trigger_patterns, recommended_agents, preferred_sources
685
  - _Requirements: 17.24_
686
 
687
- - [ ] 36.3 Integrate skills with agents
688
  - Hook skill application into agent execution
689
  - Store skills in skills directory
690
  - _Requirements: 17.45_
691
 
692
- - [ ] 37. Implement trust and freshness management
693
- - [ ] 37.1 Implement trust management
694
  - Implement get_trust_score() and update_trust()
695
  - Implement list_trusted_sources() and list_untrusted_sources()
696
  - Track verification outcomes
697
  - _Requirements: 17.28, 17.29, 17.30, 17.31, 17.32_
698
 
699
- - [ ] 37.2 Implement freshness management
700
  - Implement calculate_freshness() with domain-specific rules
701
  - Implement update_freshness() and get_stale_items()
702
  - Implement recommend_refresh() for stale items
703
  - _Requirements: 17.28, 17.29, 17.30, 17.31_
704
 
705
- - [ ] 37.3 Add trust and freshness schemas
706
  - Add SourceTrust schema to `backend/app/schemas.py`
707
  - Add FreshnessScore schema to `backend/app/schemas.py`
708
  - _Requirements: 17.28_
709
 
710
- - [ ] 37.4 Integrate with source selection
711
  - Hook trust scores into research agent source selection
712
  - Hook freshness scores into knowledge retrieval
713
  - _Requirements: 17.46_
714
 
715
- - [ ] 38. Implement learning scheduler
716
- - [ ] 38.1 Implement scheduler with safeguards
717
  - Implement schedule_task() with interval configuration
718
  - Implement is_system_idle() to check CPU usage
719
  - Implement is_battery_ok() to check battery level
720
  - Implement run_once() for manual triggers
721
  - _Requirements: 17.40, 17.41, 17.42, 17.43_
722
 
723
- - [ ] 38.2 Add scheduled tasks
724
  - Schedule knowledge ingestion (every 6 hours)
725
  - Schedule expired knowledge cleanup (daily)
726
  - Schedule pattern detection (daily)
@@ -728,7 +728,7 @@ The implementation follows 9 phases, each building on the previous while maintai
728
  - Schedule prompt optimization (weekly)
729
  - _Requirements: 17.6, 17.7, 17.40_
730
 
731
- - [ ] 38.3 Add scheduler configuration
732
  - Add LEARNING_ENABLED flag to config
733
  - Add KNOWLEDGE_MAX_SIZE_MB (default 200)
734
  - Add LEARNING_SCHEDULE_INTERVAL
@@ -736,53 +736,53 @@ The implementation follows 9 phases, each building on the previous while maintai
736
  - Add domain-specific expiration rules
737
  - _Requirements: 17.4, 17.5, 17.6, 17.7, 17.12_
738
 
739
- - [ ] 39. Add learning API endpoints
740
- - [ ] 39.1 Add learning status endpoints
741
  - Add GET /learning/status endpoint
742
  - Add POST /learning/run-once endpoint
743
  - Add GET /learning/insights endpoint
744
  - _Requirements: 17.49, 17.50, 17.51_
745
 
746
- - [ ] 39.2 Add knowledge endpoints
747
  - Add GET /knowledge endpoint for listing
748
  - Add GET /knowledge/{item_id} endpoint for details
749
  - Add GET /knowledge/search endpoint with query parameter
750
  - _Requirements: 17.52, 17.53, 17.54_
751
 
752
- - [ ] 39.3 Add skill endpoints
753
  - Add GET /skills endpoint for listing
754
  - Add GET /skills/{skill_name} endpoint for details
755
  - Add POST /skills/distill endpoint for manual distillation
756
  - _Requirements: 17.55, 17.56, 17.57_
757
 
758
- - [ ] 39.4 Add trust and freshness endpoints
759
  - Add GET /sources/trust endpoint
760
  - Add GET /sources/freshness endpoint
761
  - _Requirements: 17.58, 17.59_
762
 
763
- - [ ] 39.5 Add prompt evolution endpoints
764
  - Add GET /prompts/versions/{name} endpoint
765
  - Add POST /prompts/optimize/{name} endpoint
766
  - Add POST /prompts/promote/{name}/{version} endpoint
767
  - _Requirements: 17.60, 17.61, 17.62_
768
 
769
- - [ ] 40. Integrate learning layer with existing system
770
- - [ ] 40.1 Integrate with case execution
771
  - Hook learn_from_case() into case save flow
772
  - Store case learning metadata
773
  - _Requirements: 17.44_
774
 
775
- - [ ] 40.2 Integrate with research agent
776
  - Hook knowledge search into research agent
777
  - Use trust scores for source selection
778
  - _Requirements: 17.45, 17.46_
779
 
780
- - [ ] 40.3 Integrate with simulation
781
  - Learn from simulation outcomes
782
  - Store simulation insights
783
  - _Requirements: 17.46_
784
 
785
- - [ ] 40.4 Integrate with prompt management
786
  - Hook prompt versions into prompt loading
787
  - Track prompt performance
788
  - _Requirements: 17.47_
@@ -820,7 +820,7 @@ The implementation follows 9 phases, each building on the previous while maintai
820
  - Test scheduler respects rate limits
821
  - _Requirements: 17.6, 17.7, 17.40, 17.41, 17.42_
822
 
823
- - [ ] 42. Final checkpoint - Learning layer verification
824
  - Ensure learning subsystem runs without stressing laptop
825
  - Verify knowledge cache stays under 200MB
826
  - Verify scheduler respects battery and CPU constraints
 
114
 
115
  ### Phase 3: Agent Enhancement with Domain Intelligence
116
 
117
+ - [x] 7. Enhance Switchboard with domain detection
118
+ - [x] 7.1 Add domain pack dimension to routing
119
  - Modify `backend/app/agents/switchboard.py` to add domain_pack to routing decision
120
  - Implement domain detection using domain registry
121
  - Update RouteDecision schema in `backend/app/schemas.py`
122
  - _Requirements: 4.1, 5.6_
123
 
124
+ - [x] 7.2 Implement complexity-based routing logic
125
  - Ensure simple queries (≤5 words) route to solo mode
126
  - Ensure medium queries (≤25 words) route to standard mode
127
  - Ensure complex queries (>25 words) route to deep mode
128
  - _Requirements: 4.2, 4.3, 4.4_
129
 
130
+ - [x] 7.3 Implement simulation keyword detection
131
  - Add simulation trigger keyword detection
132
  - Load keywords from environment configuration
133
  - Set task_family="simulation" when keywords detected
134
  - _Requirements: 4.5, 4.6, 4.7_
135
 
136
+ - [x] 8. Enhance Research Agent with domain capabilities
137
+ - [x] 8.1 Integrate domain pack research enhancement
138
  - Modify `backend/app/agents/research.py` to detect domain
139
  - Call domain pack enhance_research() when domain detected
140
  - Add structured entity extraction output
141
  - _Requirements: 5.3, 7.1, 7.2, 7.3, 7.4_
142
 
143
+ - [x] 8.2 Update research agent prompt
144
  - Update `backend/app/prompts/research.txt` with domain intelligence instructions
145
  - Add instructions for entity and ticker extraction
146
  - Add instructions for structured output
147
  - _Requirements: 7.14_
148
 
149
+ - [x] 9. Enhance Verifier Agent with domain capabilities
150
+ - [x] 9.1 Integrate domain pack verification enhancement
151
  - Modify `backend/app/agents/verifier.py` to detect domain
152
  - Call domain pack enhance_verification() when domain detected
153
  - Add structured credibility scoring output
154
  - _Requirements: 5.3, 7.7, 7.8, 7.9_
155
 
156
+ - [x] 9.2 Update verifier agent prompt
157
  - Update `backend/app/prompts/verifier.txt` with domain intelligence instructions
158
  - Add instructions for rumor and scam detection
159
  - Add instructions for uncertainty surfacing
160
  - _Requirements: 7.14_
161
 
162
+ - [x] 10. Enhance Planner and Synthesizer Agents
163
+ - [x] 10.1 Add simulation mode suggestion to Planner
164
  - Modify `backend/app/agents/planner.py` to detect simulation opportunities
165
  - Add simulation_suggested field to output
166
  - Update `backend/app/prompts/planner.txt` with simulation guidance
167
  - _Requirements: 7.6, 7.14_
168
 
169
+ - [x] 10.2 Add uncertainty quantification to Synthesizer
170
  - Modify `backend/app/agents/synthesizer.py` to quantify uncertainty
171
  - Add simulation recommendation logic
172
  - Update `backend/app/prompts/synthesizer.txt` with uncertainty instructions
173
  - _Requirements: 7.11, 7.12, 7.13, 7.14_
174
 
175
+ - [x] 11. Update graph execution with domain context
176
+ - [x] 11.1 Pass domain pack context through pipeline
177
  - Modify `backend/app/graph.py` to detect domain early
178
  - Pass domain context to all agents
179
  - Ensure domain-enhanced execution flows correctly
180
  - _Requirements: 2.5, 5.3, 7.15_
181
 
182
+ - [x] 12. Checkpoint - Verify agent enhancements
183
  - Ensure Switchboard detects finance domain correctly
184
  - Verify Research agent extracts entities and tickers
185
  - Verify Verifier agent scores credibility
 
188
 
189
  ### Phase 4: Simulation Integration Enhancement
190
 
191
+ - [x] 13. Enhance simulation workflow and case linking
192
+ - [x] 13.1 Add case linking to simulation router
193
  - Modify `backend/app/routers/simulation.py` to link case_id
194
  - Improve error messages for MiroFish failures
195
  - Add better status reporting
196
  - _Requirements: 8.1, 8.11, 8.12_
197
 
198
+ - [x] 13.2 Enhance simulation store with search and filtering
199
  - Modify `backend/app/services/simulation_store.py`
200
  - Add simulation search by title or prediction_goal
201
  - Add simulation filtering by status
202
  - _Requirements: 8.10_
203
 
204
+ - [x] 13.3 Update case storage for simulation linking
205
  - Modify `backend/app/services/case_store.py` to add simulation_id field
206
  - Add case-to-simulation lookup functionality
207
  - Update CaseRecord schema in `backend/app/schemas.py`
208
  - _Requirements: 10.8, 14.9_
209
 
210
+ - [x] 13.4 Add simulation workflow to graph execution
211
  - Modify `backend/app/graph.py` to add simulation handoff logic
212
  - Add simulation result synthesis
213
  - Ensure simulation results flow into final answer
214
  - _Requirements: 3.4, 8.13_
215
 
216
+ - [x] 14. Checkpoint - Verify simulation integration
217
  - Ensure simulation requests create linked cases
218
  - Verify cases with simulations show simulation_id
219
  - Verify simulation results are synthesized correctly
 
221
 
222
  ### Phase 5: API Discovery Subsystem
223
 
224
+ - [x] 15. Create API discovery infrastructure
225
+ - [x] 15.1 Create API discovery structure
226
  - Create `backend/app/services/api_discovery/__init__.py`
227
  - Create `backend/app/services/api_discovery/catalog_loader.py`
228
  - Create `backend/app/services/api_discovery/classifier.py`
 
230
  - Create `backend/app/services/api_discovery/metadata_store.py`
231
  - _Requirements: 9.3, 9.4_
232
 
233
+ - [x] 15.2 Implement catalog loader
234
  - Implement load_public_apis_catalog() to fetch from GitHub or local cache
235
  - Parse API entries with name, description, auth, HTTPS, CORS, category, link
236
  - _Requirements: 9.3, 9.6_
237
 
238
+ - [x] 15.3 Implement API classifier and scorer
239
  - Implement classify_api() to categorize APIs by domain
240
  - Implement score_api_usefulness() to prioritize APIs for integration
241
  - Consider auth simplicity, HTTPS, CORS, category relevance
 
247
  - Add `GET /api-discovery/top-scored` endpoint
248
  - _Requirements: 9.4_
249
 
250
+ - [x] 16. Checkpoint - Verify API discovery
251
  - Ensure catalog loads successfully
252
  - Verify APIs are classified correctly
253
  - Verify scoring produces reasonable priorities
 
255
 
256
  ### Phase 6: Frontend Enhancement
257
 
258
+ - [x] 17. Create layout and navigation infrastructure
259
+ - [x] 17.1 Create layout components
260
  - Create `frontend/src/components/layout/Header.tsx` with branding and navigation
261
  - Create `frontend/src/components/layout/Navigation.tsx` with tab navigation
262
  - Modify `frontend/src/app/layout.tsx` to use new layout components
263
  - _Requirements: 12.1, 12.2_
264
 
265
+ - [x] 17.2 Create common UI components
266
  - Create `frontend/src/components/common/Badge.tsx` for status indicators
267
  - Create `frontend/src/components/common/Card.tsx` for content containers
268
  - Create `frontend/src/components/common/LoadingSpinner.tsx` for loading states
269
  - Create `frontend/src/components/common/ErrorMessage.tsx` for error display
270
  - _Requirements: 12.10, 12.11, 12.15_
271
 
272
+ - [x] 17.3 Create API client and type definitions
273
  - Create `frontend/src/lib/api.ts` with MiroOrgClient class
274
  - Implement methods for all backend endpoints
275
  - Create `frontend/src/lib/types.ts` with TypeScript interfaces
276
  - _Requirements: 11.21_
277
 
278
+ - [x] 18. Create Main Dashboard page
279
+ - [x] 18.1 Implement dashboard with system overview
280
  - Modify `frontend/src/app/page.tsx` to show quick stats
281
  - Display recent cases summary
282
  - Display system health status
283
  - Add navigation to main features
284
  - _Requirements: 12.2_
285
 
286
+ - [x] 19. Create Analyze page and components
287
+ - [x] 19.1 Create Analyze page structure
288
  - Create `frontend/src/app/analyze/page.tsx` with analysis interface
289
  - Create `frontend/src/components/analyze/TaskInput.tsx` for user input
290
  - Create `frontend/src/components/analyze/ModeSelector.tsx` for mode selection
291
  - _Requirements: 12.3, 12.7, 12.8_
292
 
293
+ - [x] 19.2 Create result display components
294
  - Create `frontend/src/components/analyze/ResultViewer.tsx` for final answers
295
  - Create `frontend/src/components/analyze/AgentOutputPanel.tsx` for agent outputs
296
  - Display route/debug badges and confidence indicators
297
  - _Requirements: 12.9, 12.10, 12.11, 12.14_
298
 
299
+ - [x] 20. Create Cases page and components
300
+ - [x] 20.1 Create Cases history interface
301
  - Create `frontend/src/app/cases/page.tsx` with case list
302
  - Create `frontend/src/components/cases/CaseList.tsx` for listing cases
303
  - Create `frontend/src/components/cases/CaseCard.tsx` for case preview
304
  - _Requirements: 12.4, 12.17_
305
 
306
+ - [x] 20.2 Create Case detail view
307
  - Create `frontend/src/app/cases/[id]/page.tsx` for case details
308
  - Create `frontend/src/components/cases/CaseDetail.tsx` for full case display
309
  - Display case_id, routing decision, agent outputs, timestamps
310
  - _Requirements: 12.17_
311
 
312
+ - [x] 21. Create Simulation page and components
313
+ - [x] 21.1 Create Simulation submission interface
314
  - Create `frontend/src/app/simulation/page.tsx` with simulation form
315
  - Create `frontend/src/components/simulation/SimulationForm.tsx` for input
316
  - Create `frontend/src/components/simulation/SimulationStatus.tsx` for status display
317
  - _Requirements: 12.6, 12.13_
318
 
319
+ - [x] 21.2 Create Simulation detail and chat interface
320
  - Create `frontend/src/app/simulation/[id]/page.tsx` for simulation details
321
  - Create `frontend/src/components/simulation/SimulationReport.tsx` for report display
322
  - Create `frontend/src/components/simulation/SimulationChat.tsx` for post-simulation chat
323
  - _Requirements: 12.13_
324
 
325
+ - [x] 22. Create Prompt Lab and Config pages
326
+ - [x] 22.1 Create Prompt Lab interface
327
  - Create `frontend/src/app/prompts/page.tsx` with prompt management
328
  - Create `frontend/src/components/prompts/PromptList.tsx` for listing prompts
329
  - Create `frontend/src/components/prompts/PromptEditor.tsx` for editing
330
  - _Requirements: 12.5_
331
 
332
+ - [x] 22.2 Create Config page
333
  - Create `frontend/src/app/config/page.tsx` with system configuration view
334
  - Display provider status, feature flags, health checks
335
  - _Requirements: 12.1_
336
 
337
+ - [x] 23. Implement dark theme and styling
338
+ - [x] 23.1 Update global styles with dark theme
339
  - Modify `frontend/src/app/globals.css` with dark color palette
340
  - Implement card-based structure with subtle borders
341
  - Add animations and transitions
342
  - Use Inter font family
343
  - _Requirements: 12.15, 12.16_
344
 
345
+ - [x] 24. Checkpoint - Verify frontend functionality
346
  - Ensure all pages are accessible via navigation
347
  - Verify Analyze workflow works end-to-end
348
  - Verify Case history displays correctly
 
353
 
354
  ### Phase 7: Testing and Documentation
355
 
356
+ - [~] 25. Write unit tests for core functionality
357
  - [ ]* 25.1 Write provider abstraction tests
358
  - Test OpenRouter, Ollama, OpenAI provider calls
359
  - Test provider fallback behavior
 
388
  - Test error handling for disabled MiroFish
389
  - _Requirements: 8.1, 8.11, 8.12_
390
 
391
+ - [~] 26. Write property-based tests
392
  - [ ]* 26.1 Write Property 1: Configuration Environment Isolation
393
  - **Property 1: Configuration Environment Isolation**
394
  - **Validates: Requirements 1.8, 6.7**
 
479
  - Test that new domain packs don't require agent changes
480
  - Create mock domain pack and verify integration
481
 
482
+ - [~] 27. Write integration tests
483
  - [ ]* 27.1 Write end-to-end case execution test
484
  - Test complete workflow from user input to final answer
485
  - Verify all agents execute correctly
 
502
  - Verify finance pack capabilities are used
503
  - _Requirements: 5.3, 7.1, 7.7_
504
 
505
+ - [x] 28. Create comprehensive documentation
506
+ - [x] 28.1 Update main README
507
  - Update `README.md` with architecture overview
508
  - Document four-layer architecture
509
  - Document agent roles and responsibilities
 
511
  - Add environment variable reference
512
  - _Requirements: 13.2, 13.3, 13.4, 13.10, 13.11, 13.12_
513
 
514
+ - [x] 28.2 Create architecture documentation
515
  - Create `ARCHITECTURE.md` with detailed architecture description
516
  - Document component interactions
517
  - Document data flow
518
  - _Requirements: 13.10_
519
 
520
+ - [x] 28.3 Create domain pack documentation
521
  - Create `DOMAIN_PACKS.md` with domain pack integration guide
522
  - Document how to create new domain packs
523
  - Document finance pack capabilities
524
  - _Requirements: 13.13_
525
 
526
+ - [x] 28.4 Create testing documentation
527
  - Create `TESTING.md` with testing strategy and guidelines
528
  - Document unit test patterns
529
  - Document property-based test patterns
530
  - Document integration test patterns
531
  - _Requirements: 14.1, 14.2, 14.3_
532
 
533
+ - [x] 28.5 Create deployment documentation
534
  - Create `DEPLOYMENT.md` with deployment instructions
535
  - Document environment setup
536
  - Document dependency installation
 
546
 
547
  ### Phase 8: Cleanup and Optimization
548
 
549
+ - [x] 30. Remove dead code and optimize performance
550
+ - [x] 30.1 Clean up codebase
551
  - Remove unused imports across all files
552
  - Remove commented code
553
  - Remove duplicate implementations
554
  - _Requirements: 1.5_
555
 
556
+ - [x] 30.2 Optimize external API performance
557
  - Add caching for market quotes with 5 minute TTL
558
  - Verify connection pooling is implemented
559
  - Verify request timeouts are configured
560
  - Add rate limiting for external APIs
561
  - _Requirements: 15.3, 15.4, 15.5, 15.6_
562
 
563
+ - [x] 30.3 Polish error messages and logging
564
  - Review all error messages for clarity and consistency
565
  - Review log levels for appropriateness
566
  - Add missing log entries for key operations
567
  - _Requirements: 9.3, 9.8_
568
 
569
+ - [x] 30.4 Security review
570
  - Verify no API keys in source code
571
  - Verify error messages don't leak internals
572
  - Verify input validation is comprehensive
 
579
  - Identify and address bottlenecks
580
  - _Requirements: 15.1, 15.2_
581
 
582
+ - [x] 31. Final checkpoint - System verification
583
  - Ensure no dead code remains
584
  - Verify performance meets requirements
585
  - Verify error messages are clear and consistent
 
589
 
590
  ### Phase 9: Autonomous Knowledge Evolution Layer
591
 
592
+ - [x] 32. Create learning subsystem infrastructure
593
+ - [x] 32.1 Create learning service structure
594
  - Create `backend/app/services/learning/__init__.py`
595
  - Create `backend/app/services/learning/knowledge_ingestor.py`
596
  - Create `backend/app/services/learning/knowledge_store.py`
597
  - Create `backend/app/services/learning/learning_engine.py`
598
  - _Requirements: 17.1, 17.2, 17.3_
599
 
600
+ - [x] 32.2 Create additional learning services
601
  - Create `backend/app/services/learning/prompt_optimizer.py`
602
  - Create `backend/app/services/learning/skill_distiller.py`
603
  - Create `backend/app/services/learning/trust_manager.py`
 
605
  - Create `backend/app/services/learning/scheduler.py`
606
  - _Requirements: 17.1, 17.17, 17.23, 17.28, 17.40_
607
 
608
+ - [x] 32.3 Create data directories
609
  - Create `backend/app/data/knowledge/` directory
610
  - Create `backend/app/data/skills/` directory
611
  - Create `backend/app/data/prompt_versions/` directory
612
  - Create `backend/app/data/learning/` directory
613
  - _Requirements: 17.33, 17.34, 17.35, 17.36_
614
 
615
+ - [x] 33. Implement knowledge ingestion and storage
616
+ - [x] 33.1 Implement knowledge ingestion
617
  - Implement ingest_from_search() using Tavily API
618
  - Implement ingest_from_url() using Jina Reader
619
  - Implement ingest_from_news() using NewsAPI
620
  - Implement compress_content() for summarization (2-4KB limit)
621
  - _Requirements: 17.8, 17.9, 17.10, 17.11_
622
 
623
+ - [x] 33.2 Implement knowledge store
624
  - Implement save_knowledge() with JSON storage
625
  - Implement get_knowledge() and search_knowledge()
626
  - Implement delete_expired_knowledge() with auto-cleanup
 
628
  - Implement LRU eviction when limit reached
629
  - _Requirements: 17.4, 17.5, 17.33, 17.38_
630
 
631
+ - [x] 33.3 Add knowledge schemas
632
  - Add KnowledgeItem schema to `backend/app/schemas.py`
633
  - Add validation for summary length (2-4KB)
634
  - Add trust_score and freshness_score fields
635
  - _Requirements: 17.9_
636
 
637
+ - [x] 34. Implement experience learning
638
+ - [x] 34.1 Implement case learning
639
  - Implement learn_from_case() to extract metadata
640
  - Implement detect_patterns() for repeated patterns
641
  - Implement get_route_effectiveness() for routing insights
642
  - Implement get_prompt_performance() for prompt insights
643
  - _Requirements: 17.13, 17.14, 17.15, 17.16_
644
 
645
+ - [x] 34.2 Add case learning schemas
646
  - Add CaseLearning schema to `backend/app/schemas.py`
647
  - Add fields for route_effectiveness, prompt_performance, provider_reliability
648
  - _Requirements: 17.13_
649
 
650
+ - [x] 34.3 Hook learning into case save flow
651
  - Modify `backend/app/services/case_store.py` to call learn_from_case()
652
  - Store case learning metadata separately
653
  - _Requirements: 17.44_
654
 
655
+ - [x] 35. Implement prompt evolution
656
+ - [x] 35.1 Implement prompt versioning
657
  - Implement create_prompt_variant() using provider API
658
  - Implement test_prompt_variant() with quality metrics
659
  - Implement compare_prompts() for A/B testing
 
661
  - Implement archive_prompt() for old versions
662
  - _Requirements: 17.17, 17.18, 17.19, 17.20, 17.21, 17.22_
663
 
664
+ - [x] 35.2 Add prompt version schemas
665
  - Add PromptVersion schema to `backend/app/schemas.py`
666
  - Add fields for version, status, win_rate, test_count
667
  - _Requirements: 17.17_
668
 
669
+ - [x] 35.3 Integrate with prompt management
670
  - Hook prompt versions into prompt loading
671
  - Store prompt history in prompt_versions directory
672
  - _Requirements: 17.47_
673
 
674
+ - [x] 36. Implement skill distillation
675
+ - [x] 36.1 Implement skill detection and creation
676
  - Implement detect_skill_candidates() from patterns
677
  - Implement distill_skill() to create skill records
678
  - Implement test_skill() for validation
679
  - Implement apply_skill() for skill usage
680
  - _Requirements: 17.23, 17.24, 17.25, 17.26, 17.27_
681
 
682
+ - [x] 36.2 Add skill schemas
683
  - Add Skill schema to `backend/app/schemas.py`
684
  - Add fields for trigger_patterns, recommended_agents, preferred_sources
685
  - _Requirements: 17.24_
686
 
687
+ - [x] 36.3 Integrate skills with agents
688
  - Hook skill application into agent execution
689
  - Store skills in skills directory
690
  - _Requirements: 17.45_
691
 
692
+ - [x] 37. Implement trust and freshness management
693
+ - [x] 37.1 Implement trust management
694
  - Implement get_trust_score() and update_trust()
695
  - Implement list_trusted_sources() and list_untrusted_sources()
696
  - Track verification outcomes
697
  - _Requirements: 17.28, 17.29, 17.30, 17.31, 17.32_
698
 
699
+ - [x] 37.2 Implement freshness management
700
  - Implement calculate_freshness() with domain-specific rules
701
  - Implement update_freshness() and get_stale_items()
702
  - Implement recommend_refresh() for stale items
703
  - _Requirements: 17.28, 17.29, 17.30, 17.31_
704
 
705
+ - [x] 37.3 Add trust and freshness schemas
706
  - Add SourceTrust schema to `backend/app/schemas.py`
707
  - Add FreshnessScore schema to `backend/app/schemas.py`
708
  - _Requirements: 17.28_
709
 
710
+ - [x] 37.4 Integrate with source selection
711
  - Hook trust scores into research agent source selection
712
  - Hook freshness scores into knowledge retrieval
713
  - _Requirements: 17.46_
714
 
715
+ - [x] 38. Implement learning scheduler
716
+ - [x] 38.1 Implement scheduler with safeguards
717
  - Implement schedule_task() with interval configuration
718
  - Implement is_system_idle() to check CPU usage
719
  - Implement is_battery_ok() to check battery level
720
  - Implement run_once() for manual triggers
721
  - _Requirements: 17.40, 17.41, 17.42, 17.43_
722
 
723
+ - [x] 38.2 Add scheduled tasks
724
  - Schedule knowledge ingestion (every 6 hours)
725
  - Schedule expired knowledge cleanup (daily)
726
  - Schedule pattern detection (daily)
 
728
  - Schedule prompt optimization (weekly)
729
  - _Requirements: 17.6, 17.7, 17.40_
730
 
731
+ - [x] 38.3 Add scheduler configuration
732
  - Add LEARNING_ENABLED flag to config
733
  - Add KNOWLEDGE_MAX_SIZE_MB (default 200)
734
  - Add LEARNING_SCHEDULE_INTERVAL
 
736
  - Add domain-specific expiration rules
737
  - _Requirements: 17.4, 17.5, 17.6, 17.7, 17.12_
738
 
739
+ - [x] 39. Add learning API endpoints
740
+ - [x] 39.1 Add learning status endpoints
741
  - Add GET /learning/status endpoint
742
  - Add POST /learning/run-once endpoint
743
  - Add GET /learning/insights endpoint
744
  - _Requirements: 17.49, 17.50, 17.51_
745
 
746
+ - [x] 39.2 Add knowledge endpoints
747
  - Add GET /knowledge endpoint for listing
748
  - Add GET /knowledge/{item_id} endpoint for details
749
  - Add GET /knowledge/search endpoint with query parameter
750
  - _Requirements: 17.52, 17.53, 17.54_
751
 
752
+ - [x] 39.3 Add skill endpoints
753
  - Add GET /skills endpoint for listing
754
  - Add GET /skills/{skill_name} endpoint for details
755
  - Add POST /skills/distill endpoint for manual distillation
756
  - _Requirements: 17.55, 17.56, 17.57_
757
 
758
+ - [x] 39.4 Add trust and freshness endpoints
759
  - Add GET /sources/trust endpoint
760
  - Add GET /sources/freshness endpoint
761
  - _Requirements: 17.58, 17.59_
762
 
763
+ - [x] 39.5 Add prompt evolution endpoints
764
  - Add GET /prompts/versions/{name} endpoint
765
  - Add POST /prompts/optimize/{name} endpoint
766
  - Add POST /prompts/promote/{name}/{version} endpoint
767
  - _Requirements: 17.60, 17.61, 17.62_
768
 
769
+ - [x] 40. Integrate learning layer with existing system
770
+ - [x] 40.1 Integrate with case execution
771
  - Hook learn_from_case() into case save flow
772
  - Store case learning metadata
773
  - _Requirements: 17.44_
774
 
775
+ - [x] 40.2 Integrate with research agent
776
  - Hook knowledge search into research agent
777
  - Use trust scores for source selection
778
  - _Requirements: 17.45, 17.46_
779
 
780
+ - [x] 40.3 Integrate with simulation
781
  - Learn from simulation outcomes
782
  - Store simulation insights
783
  - _Requirements: 17.46_
784
 
785
+ - [x] 40.4 Integrate with prompt management
786
  - Hook prompt versions into prompt loading
787
  - Track prompt performance
788
  - _Requirements: 17.47_
 
820
  - Test scheduler respects rate limits
821
  - _Requirements: 17.6, 17.7, 17.40, 17.41, 17.42_
822
 
823
+ - [x] 42. Final checkpoint - Learning layer verification
824
  - Ensure learning subsystem runs without stressing laptop
825
  - Verify knowledge cache stays under 200MB
826
  - Verify scheduler respects battery and CPU constraints
ARCHITECTURE.md ADDED
@@ -0,0 +1,567 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MiroOrg v1.1 Architecture
2
+
3
+ ## System Overview
4
+
5
+ MiroOrg v1.1 is a 5-layer AI intelligence system designed for single-user local deployment with autonomous learning capabilities.
6
+
7
+ ## Five-Layer Architecture
8
+
9
+ ```
10
+ ┌─────────────────────────────────────────────────────────────┐
11
+ │ Layer 5: Autonomous Knowledge Evolution │
12
+ │ - Knowledge ingestion from web/news │
13
+ │ - Experience learning from cases │
14
+ │ - Prompt evolution via A/B testing │
15
+ │ - Skill distillation from patterns │
16
+ │ - Trust & freshness management │
17
+ └─────────────────────────────────────────────────────────────┘
18
+
19
+ ┌─────────────────────────────────────────────────────────────┐
20
+ │ Layer 4: Simulation Lab Integration │
21
+ │ - MiroFish adapter for scenario modeling │
22
+ │ - Case-simulation linking │
23
+ │ - Simulation result synthesis │
24
+ └─────────────────────────────────────────────────────────────┘
25
+
26
+ ┌─────────────────────────────────────────────────────────────┐
27
+ │ Layer 3: Agent Orchestration │
28
+ │ - Switchboard (4D routing) │
29
+ │ - Research (domain-enhanced) │
30
+ │ - Verifier (credibility scoring) │
31
+ │ - Planner (simulation-aware) │
32
+ │ - Synthesizer (uncertainty quantification) │
33
+ └─────────────────────────────────────────────────────────────┘
34
+
35
+ ┌─────────────────────────────────────────────────────────────┐
36
+ │ Layer 2: Domain Intelligence │
37
+ │ - Pluggable domain packs │
38
+ │ - Finance pack (market data, news, analysis) │
39
+ │ - Domain registry & detection │
40
+ └─────────────────────────────────────────────────────────────┘
41
+
42
+ ┌─────────────────────────────────────────────────────────────┐
43
+ │ Layer 1: Core Platform │
44
+ │ - Multi-provider LLM (OpenRouter/Ollama/OpenAI) │
45
+ │ - Automatic fallback │
46
+ │ - Configuration management │
47
+ │ - Data persistence │
48
+ └─────────────────────────────────────────────────────────────┘
49
+ ```
50
+
51
+ ## Layer 1: Core Platform
52
+
53
+ ### Multi-Provider LLM Abstraction
54
+
55
+ The system supports three LLM providers with automatic fallback:
56
+
57
+ 1. **OpenRouter** - Recommended for production
58
+ - Access to multiple models
59
+ - Pay-per-use pricing
60
+ - High availability
61
+
62
+ 2. **Ollama** - Local deployment
63
+ - Privacy-focused
64
+ - No API costs
65
+ - Requires local GPU/CPU
66
+
67
+ 3. **OpenAI** - High quality
68
+ - GPT-4 and GPT-3.5
69
+ - Reliable performance
70
+ - Higher cost
71
+
72
+ **Fallback Chain:**
73
+ ```
74
+ Primary Provider → Fallback Provider → Error
75
+ ```
76
+
77
+ ### Configuration Management
78
+
79
+ All configuration via environment variables:
80
+ - Provider selection
81
+ - API keys
82
+ - Feature flags
83
+ - Domain pack enablement
84
+ - Learning layer settings
85
+
86
+ ### Data Persistence
87
+
88
+ JSON-based storage for single-user deployment:
89
+ - Cases: `backend/app/data/memory/`
90
+ - Simulations: `backend/app/data/simulations/`
91
+ - Knowledge: `backend/app/data/knowledge/`
92
+ - Skills: `backend/app/data/skills/`
93
+ - Prompt versions: `backend/app/data/prompt_versions/`
94
+
95
+ ## Layer 2: Domain Intelligence
96
+
97
+ ### Domain Pack System
98
+
99
+ Domain packs are pluggable modules that enhance agent capabilities:
100
+
101
+ ```python
102
+ class DomainPack(ABC):
103
+ @property
104
+ @abstractmethod
105
+ def name(self) -> str:
106
+ """Domain pack name (e.g., 'finance')"""
107
+ pass
108
+
109
+ @property
110
+ @abstractmethod
111
+ def keywords(self) -> List[str]:
112
+ """Keywords for domain detection"""
113
+ pass
114
+
115
+ @abstractmethod
116
+ async def enhance_research(self, query: str, context: Dict) -> Dict:
117
+ """Enhance research with domain-specific capabilities"""
118
+ pass
119
+
120
+ @abstractmethod
121
+ async def enhance_verification(self, claims: List[str], context: Dict) -> Dict:
122
+ """Enhance verification with domain-specific checks"""
123
+ pass
124
+
125
+ @abstractmethod
126
+ def get_capabilities(self) -> List[str]:
127
+ """List domain-specific capabilities"""
128
+ pass
129
+ ```
130
+
131
+ ### Finance Domain Pack
132
+
133
+ Included capabilities:
134
+ - **Market Data**: Real-time quotes via Alpha Vantage
135
+ - **News**: Financial news via NewsAPI
136
+ - **Entity Resolution**: Extract and normalize company names
137
+ - **Ticker Resolution**: Map companies to stock tickers
138
+ - **Source Checking**: Credibility scoring for financial sources
139
+ - **Rumor Detection**: Identify unverified claims
140
+ - **Scam Detection**: Flag potential scams
141
+ - **Stance Detection**: Analyze sentiment and stance
142
+ - **Event Analysis**: Analyze market-moving events
143
+ - **Prediction**: Generate market predictions
144
+
145
+ ### Domain Detection
146
+
147
+ Automatic domain detection based on keywords:
148
+ ```python
149
+ # Finance keywords
150
+ ["stock", "market", "trading", "investment", "portfolio",
151
+ "earnings", "dividend", "IPO", "merger", "acquisition"]
152
+ ```
153
+
154
+ ## Layer 3: Agent Orchestration
155
+
156
+ ### Agent Pipeline
157
+
158
+ ```
159
+ User Input
160
+
161
+ Switchboard (4D Routing)
162
+
163
+ Research (Domain-Enhanced)
164
+
165
+ Planner (Simulation-Aware)
166
+
167
+ Verifier (Credibility Scoring)
168
+
169
+ Synthesizer (Final Answer)
170
+
171
+ Case Storage
172
+ ```
173
+
174
+ ### Switchboard Agent
175
+
176
+ **Four-Dimensional Classification:**
177
+
178
+ 1. **Task Family**
179
+ - `normal`: Standard analysis
180
+ - `simulation`: Scenario modeling
181
+
182
+ 2. **Domain Pack**
183
+ - `finance`: Financial domain
184
+ - `general`: General knowledge
185
+ - `policy`: Policy analysis
186
+ - `custom`: Custom domains
187
+
188
+ 3. **Complexity**
189
+ - `simple`: ≤5 words
190
+ - `medium`: 6-25 words
191
+ - `complex`: >25 words
192
+
193
+ 4. **Execution Mode**
194
+ - `solo`: Synthesizer only (simple queries)
195
+ - `standard`: Research → Synthesizer (medium queries)
196
+ - `deep`: Full pipeline (complex queries)
197
+
198
+ ### Research Agent
199
+
200
+ **Capabilities:**
201
+ - Web search via Tavily
202
+ - News search via NewsAPI
203
+ - Domain-specific data sources
204
+ - Entity extraction
205
+ - Source credibility assessment
206
+
207
+ **Domain Enhancement:**
208
+ ```python
209
+ if domain == "finance":
210
+ # Extract tickers and entities
211
+ # Fetch market data
212
+ # Get financial news
213
+ # Score source credibility
214
+ ```
215
+
216
+ ### Verifier Agent
217
+
218
+ **Verification Process:**
219
+ 1. Extract claims from research
220
+ 2. Check source reliability
221
+ 3. Detect rumors and scams
222
+ 4. Quantify uncertainty
223
+ 5. Flag high-risk claims
224
+
225
+ **Domain Enhancement:**
226
+ ```python
227
+ if domain == "finance":
228
+ # Check financial source credibility
229
+ # Detect market manipulation
230
+ # Verify regulatory compliance
231
+ ```
232
+
233
+ ### Planner Agent
234
+
235
+ **Planning Process:**
236
+ 1. Analyze research findings
237
+ 2. Identify action items
238
+ 3. Detect simulation opportunities
239
+ 4. Create structured plan
240
+
241
+ **Simulation Detection:**
242
+ - Keywords: "predict", "what if", "scenario", "impact"
243
+ - Uncertainty level: High
244
+ - Recommendation: Suggest simulation mode
245
+
246
+ ### Synthesizer Agent
247
+
248
+ **Synthesis Process:**
249
+ 1. Combine all agent outputs
250
+ 2. Quantify uncertainty
251
+ 3. Generate final answer
252
+ 4. Add metadata (confidence, sources, etc.)
253
+
254
+ **Output Structure:**
255
+ ```json
256
+ {
257
+ "summary": "Final answer",
258
+ "confidence": 0.85,
259
+ "uncertainty_factors": [...],
260
+ "sources": [...],
261
+ "simulation_recommended": false
262
+ }
263
+ ```
264
+
265
+ ## Layer 4: Simulation Lab Integration
266
+
267
+ ### MiroFish Adapter
268
+
269
+ Adapter pattern for simulation integration:
270
+
271
+ ```python
272
+ class MiroFishClient:
273
+ async def submit_simulation(self, title, seed_text, prediction_goal):
274
+ """Submit simulation to MiroFish"""
275
+
276
+ async def get_status(self, simulation_id):
277
+ """Get simulation status"""
278
+
279
+ async def get_report(self, simulation_id):
280
+ """Get simulation report"""
281
+
282
+ async def chat(self, simulation_id, message):
283
+ """Chat with simulation"""
284
+ ```
285
+
286
+ ### Case-Simulation Linking
287
+
288
+ Cases and simulations are linked:
289
+ ```json
290
+ {
291
+ "case_id": "uuid",
292
+ "simulation_id": "uuid",
293
+ "user_input": "...",
294
+ "route": {...},
295
+ "outputs": {...},
296
+ "final_answer": "..."
297
+ }
298
+ ```
299
+
300
+ ### Simulation Workflow
301
+
302
+ ```
303
+ User Input with Simulation Keywords
304
+
305
+ Switchboard (task_family="simulation")
306
+
307
+ Research (gather context)
308
+
309
+ Submit to MiroFish
310
+
311
+ Poll for completion
312
+
313
+ Synthesize results
314
+
315
+ Return to user
316
+ ```
317
+
318
+ ## Layer 5: Autonomous Knowledge Evolution
319
+
320
+ ### Knowledge Ingestion
321
+
322
+ **Sources:**
323
+ - Web search (Tavily)
324
+ - News (NewsAPI)
325
+ - URLs (Jina Reader)
326
+
327
+ **Process:**
328
+ 1. Fetch content from source
329
+ 2. Compress to 2-4KB summary using LLM
330
+ 3. Store with metadata
331
+ 4. Enforce 200MB storage limit
332
+ 5. LRU eviction when limit reached
333
+
334
+ **Scheduling:**
335
+ - Every 6 hours (configurable)
336
+ - Only when system idle (CPU <50%)
337
+ - Only when battery OK (>30%)
338
+
339
+ ### Experience Learning
340
+
341
+ **Learning from Cases:**
342
+ 1. Extract metadata from case execution
343
+ 2. Detect patterns (domain, sources, agents)
344
+ 3. Track route effectiveness
345
+ 4. Track prompt performance
346
+ 5. Track provider reliability
347
+
348
+ **Pattern Detection:**
349
+ - Domain expertise patterns
350
+ - Preferred source patterns
351
+ - Agent workflow patterns
352
+ - Minimum frequency: 3 occurrences
353
+
354
+ ### Prompt Evolution
355
+
356
+ **A/B Testing Process:**
357
+ 1. Create prompt variant using LLM
358
+ 2. Test variant with sample inputs
359
+ 3. Measure quality metrics
360
+ 4. Compare win rates
361
+ 5. Promote if criteria met (>70% win rate, >10 tests)
362
+
363
+ **Versioning:**
364
+ ```json
365
+ {
366
+ "id": "uuid",
367
+ "prompt_name": "research",
368
+ "version": 2,
369
+ "status": "testing",
370
+ "test_count": 15,
371
+ "win_count": 12,
372
+ "win_rate": 0.80
373
+ }
374
+ ```
375
+
376
+ ### Skill Distillation
377
+
378
+ **Skill Creation:**
379
+ 1. Detect patterns in successful cases
380
+ 2. Distill into reusable skills
381
+ 3. Test skills with validation cases
382
+ 4. Track usage and success rate
383
+
384
+ **Skill Structure:**
385
+ ```json
386
+ {
387
+ "id": "domain_expertise_finance",
388
+ "type": "domain_expertise",
389
+ "trigger_patterns": ["stock", "market"],
390
+ "recommended_agents": ["research", "verifier"],
391
+ "preferred_sources": ["bloomberg", "reuters"],
392
+ "success_rate": 0.85
393
+ }
394
+ ```
395
+
396
+ ### Trust Management
397
+
398
+ **Source Trust Scoring:**
399
+ - Initial score: 0.5 (neutral)
400
+ - Updated based on verification outcomes
401
+ - Exponential moving average
402
+ - Minimum verifications: 3
403
+
404
+ **Trust Categories:**
405
+ - Trusted: score ≥0.7, verifications ≥3
406
+ - Untrusted: score ≤0.3, verifications ≥3
407
+ - Unknown: verifications <3
408
+
409
+ ### Freshness Management
410
+
411
+ **Domain-Specific Expiration:**
412
+ - Finance: 7-day half-life
413
+ - General: 30-day half-life
414
+ - Exponential decay: `freshness = 2^(-age_days / half_life_days)`
415
+
416
+ **Refresh Recommendations:**
417
+ - Freshness <0.3: Stale
418
+ - Prioritize by staleness
419
+ - Automatic refresh during scheduled ingestion
420
+
421
+ ### Learning Scheduler
422
+
423
+ **Safeguards:**
424
+ - CPU usage check: <50%
425
+ - Battery level check: >30%
426
+ - System idle detection
427
+ - Error handling with backoff
428
+
429
+ **Scheduled Tasks:**
430
+ - Knowledge ingestion: Every 6 hours
431
+ - Expired cleanup: Daily
432
+ - Pattern detection: Daily
433
+ - Skill distillation: Weekly
434
+ - Prompt optimization: Weekly
435
+
436
+ ## Data Flow
437
+
438
+ ### Normal Request Flow
439
+
440
+ ```
441
+ 1. User submits request
442
+ 2. Switchboard classifies (4D)
443
+ 3. Domain pack detected
444
+ 4. Research gathers info (domain-enhanced)
445
+ 5. Planner creates plan
446
+ 6. Verifier validates (domain-enhanced)
447
+ 7. Synthesizer produces answer
448
+ 8. Case saved
449
+ 9. Learning extracts metadata
450
+ ```
451
+
452
+ ### Simulation Request Flow
453
+
454
+ ```
455
+ 1. User submits request with simulation keywords
456
+ 2. Switchboard detects simulation
457
+ 3. Research gathers context
458
+ 4. Submit to MiroFish
459
+ 5. Poll for completion
460
+ 6. Synthesize results
461
+ 7. Case and simulation saved
462
+ 8. Learning extracts metadata
463
+ ```
464
+
465
+ ### Learning Flow
466
+
467
+ ```
468
+ 1. Scheduler checks system conditions
469
+ 2. If idle and battery OK:
470
+ a. Ingest knowledge from sources
471
+ b. Compress to 2-4KB summaries
472
+ c. Store with metadata
473
+ d. Enforce storage limits
474
+ 3. Detect patterns in recent cases
475
+ 4. Distill skills from patterns
476
+ 5. Optimize prompts via A/B testing
477
+ 6. Update trust scores
478
+ 7. Calculate freshness scores
479
+ ```
480
+
481
+ ## Deployment Considerations
482
+
483
+ ### Single-User Local (Recommended)
484
+
485
+ **Optimizations:**
486
+ - JSON storage (no database)
487
+ - Learning scheduler with safeguards
488
+ - 200MB knowledge cache limit
489
+ - CPU/battery awareness
490
+ - Automatic LRU eviction
491
+
492
+ **Hardware Requirements:**
493
+ - 8GB RAM minimum
494
+ - 256GB storage minimum
495
+ - M2 Air or equivalent
496
+
497
+ ### Multi-User Production
498
+
499
+ **Required Changes:**
500
+ - PostgreSQL for storage
501
+ - Redis for caching
502
+ - Authentication/authorization
503
+ - Rate limiting
504
+ - Request queuing
505
+ - Monitoring/alerting
506
+ - Horizontal scaling
507
+
508
+ ## Security
509
+
510
+ ### API Key Management
511
+
512
+ - All keys in environment variables
513
+ - Never committed to source control
514
+ - Validated on startup
515
+
516
+ ### Error Handling
517
+
518
+ - Sanitized error messages
519
+ - No internal details leaked
520
+ - Comprehensive logging
521
+
522
+ ### Input Validation
523
+
524
+ - Pydantic schemas for all inputs
525
+ - Type checking
526
+ - Length limits
527
+ - Sanitization
528
+
529
+ ## Performance
530
+
531
+ ### Response Times
532
+
533
+ - Simple queries: <5s
534
+ - Medium queries: <15s
535
+ - Complex queries: <30s
536
+ - Simulations: 60-120s
537
+
538
+ ### Optimization Strategies
539
+
540
+ - Connection pooling for external APIs
541
+ - Request timeouts (30s)
542
+ - Caching for market quotes (5min TTL)
543
+ - Rate limiting for external APIs
544
+ - Async/await throughout
545
+
546
+ ## Monitoring
547
+
548
+ ### Health Checks
549
+
550
+ - `/health` - Basic health
551
+ - `/health/deep` - Detailed health with provider status
552
+
553
+ ### Metrics
554
+
555
+ - Case execution time
556
+ - Provider success/failure rates
557
+ - Domain pack usage
558
+ - Learning task completion
559
+ - Storage usage
560
+
561
+ ### Logging
562
+
563
+ - Structured logging
564
+ - Log levels: DEBUG, INFO, WARNING, ERROR
565
+ - Rotation and retention
566
+ - Provider fallback events
567
+ - Learning task events
README.md CHANGED
@@ -1,182 +1,348 @@
1
- # MiroOrg Basic v2
2
 
3
- A multi-agent AI organization system built with FastAPI, LangGraph, and Google Gemini.
4
 
5
  ## Overview
6
 
7
- MiroOrg Basic v2 is an intelligent system that processes user requests through a pipeline of specialized AI agents:
8
- - **Switchboard**: Routes and categorizes incoming requests
9
- - **Research**: Gathers relevant information and context
10
- - **Planner**: Creates actionable plans based on research
11
- - **Verifier**: Validates plans and identifies potential issues
12
- - **Synthesizer**: Produces final, comprehensive responses
 
13
 
14
  ## Architecture
15
 
16
- - **Backend**: FastAPI application with REST endpoints
17
- - **Orchestration**: LangGraph for agent workflow management
18
- - **AI Model**: Google Gemini for agent responses
19
- - **Memory**: JSON-based case persistence
20
- - **Configuration**: Environment variables for API keys
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  ## Quick Start
23
 
24
  ### Prerequisites
25
 
26
- - Python 3.14+
27
- - Google Gemini API key
 
 
 
 
28
 
29
- ### Installation
30
 
31
- 1. Clone the repository:
32
  ```bash
33
- git clone <repository-url>
34
- cd miroorg-basic-v2
 
 
35
  ```
36
 
37
- 2. Set up virtual environment:
38
  ```bash
39
- cd backend
40
- python3 -m venv venv
41
- source venv/bin/activate # On Windows: venv\Scripts\activate
42
  ```
43
 
44
- 3. Install dependencies:
45
  ```bash
46
- pip install -r requirements.txt
47
  ```
48
 
49
- 4. Configure environment:
 
 
 
 
50
  ```bash
51
- cp .env.example .env
52
- # Edit .env and add your GEMINI_API_KEY
53
  ```
54
 
55
- 5. Run the application:
56
  ```bash
57
- uvicorn app.main:app --reload
 
58
  ```
59
 
60
- The API will be available at `http://localhost:8000`
 
 
 
61
 
62
- ## API Endpoints
 
 
63
 
64
- ### GET /health
65
- Health check endpoint.
66
 
67
- **Response:**
68
- ```json
69
- {
70
- "status": "healthy"
71
- }
 
 
 
72
  ```
73
 
74
- ### POST /run
75
- Process a user request through the agent pipeline.
76
 
77
- **Request:**
78
- ```json
79
- {
80
- "user_input": "Your request here"
81
- }
82
- ```
83
 
84
- **Response:**
85
- ```json
86
- {
87
- "case_id": "uuid-string",
88
- "route": {...},
89
- "research": {...},
90
- "planner": {...},
91
- "verifier": {...},
92
- "final": {...}
93
- }
94
- ```
95
 
96
- ## Configuration
 
97
 
98
- Create a `.env` file in the backend directory:
 
 
99
 
100
- ```env
101
- GEMINI_API_KEY=your_api_key_here
102
- MODEL_NAME=gemini-1.5-flash
 
103
  ```
104
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  ## Project Structure
106
 
107
  ```
108
  backend/
109
  ├── app/
110
- │ ├── __init__.py
111
- │ ├── config.py # Configuration and paths
112
- │ ├── main.py # FastAPI application
113
- │ ├── schemas.py # Pydantic models
114
- │ ├── memory.py # Case persistence
115
- │ ├── graph.py # LangGraph orchestration
116
- ── agents/ # Agent implementations
117
- ├── __init__.py
118
- │ │ ├── _model.py # Gemini API wrapper
119
- │ │ ├── switchboard.py
120
- │ │ ── research.py
121
- ├── planner.py
122
- │ │ ├── verifier.py
123
- │ │ ── synthesizer.py
124
- │ ├── prompts/ # Agent instruction files
125
- │ │ ├── research.txt
126
- │ │ ├── planner.txt
127
- │ │ ├── verifier.txt
128
- │ │ ── synthesizer.txt
129
- │ └── data/
130
- ├── logs/ # Application logs
131
- ── memory/ # Saved cases
132
- ├── .env # Environment variables
133
- ── requirements.txt # Python dependencies
134
- ── README.md
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  ```
136
 
137
  ## Development
138
 
139
- ### Running Tests
140
 
141
- ```bash
142
- pytest
143
- ```
 
 
144
 
145
- ### Adding New Agents
146
 
147
- 1. Create agent file in `app/agents/`
148
- 2. Add prompt file in `app/prompts/`
149
- 3. Update `graph.py` to include new node
150
- 4. Add to `schemas.py` if needed
151
 
152
- ### Logging
153
 
154
- Logs are written to `app/data/logs/` with rotation. Check logs for debugging agent execution and API errors.
 
 
 
 
155
 
156
- ## Deployment
 
 
 
 
 
157
 
158
- ### Docker
 
 
 
 
159
 
160
- ```dockerfile
161
- FROM python:3.14-slim
162
 
163
- WORKDIR /app
164
- COPY requirements.txt .
165
- RUN pip install -r requirements.txt
166
 
167
- COPY . .
168
- EXPOSE 8000
169
 
170
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
171
- ```
 
 
172
 
173
- ### Production Considerations
174
 
175
- - Set `GEMINI_API_KEY` securely (environment variables, secrets management)
176
- - Configure logging for your deployment environment
177
- - Add rate limiting and authentication as needed
178
- - Monitor agent performance and API usage
 
 
 
 
179
 
180
  ## License
181
 
182
- [Add your license here]
 
 
 
 
 
1
+ # MiroOrg v1.1 - AI Financial Intelligence System
2
 
3
+ A general intelligence operating system that orchestrates multiple specialist agents, runs simulations, and supports pluggable domain packs with autonomous knowledge evolution.
4
 
5
  ## Overview
6
 
7
+ MiroOrg v1.1 is a 5-layer AI system that processes user requests through specialized agents, integrates domain-specific intelligence, runs simulations, and continuously improves itself over time:
8
+
9
+ - **Layer 1: Core Platform** - Multi-provider LLM abstraction (OpenRouter, Ollama, OpenAI) with automatic fallback
10
+ - **Layer 2: Domain Intelligence** - Pluggable domain packs (starting with Finance) that enhance agent capabilities
11
+ - **Layer 3: Agent Orchestration** - Five specialized agents working in concert
12
+ - **Layer 4: Simulation Lab** - Integration with MiroFish for scenario modeling
13
+ - **Layer 5: Autonomous Learning** - Self-improvement through knowledge ingestion, prompt evolution, and skill distillation
14
 
15
  ## Architecture
16
 
17
+ ### Five Specialized Agents
18
+
19
+ 1. **Switchboard** - Routes requests using 4-dimensional classification:
20
+ - Task family (normal/simulation)
21
+ - Domain pack (finance/general/policy/custom)
22
+ - Complexity (simple/medium/complex)
23
+ - Execution mode (solo/standard/deep)
24
+
25
+ 2. **Research** - Gathers information with domain-enhanced capabilities:
26
+ - Web search via Tavily
27
+ - News via NewsAPI
28
+ - Financial data via Alpha Vantage
29
+ - Entity and ticker extraction for finance domain
30
+
31
+ 3. **Verifier** - Validates information with credibility scoring:
32
+ - Source reliability checking
33
+ - Rumor and scam detection
34
+ - Uncertainty quantification
35
+
36
+ 4. **Planner** - Creates actionable plans with simulation awareness:
37
+ - Detects opportunities for scenario modeling
38
+ - Suggests simulation mode when appropriate
39
+
40
+ 5. **Synthesizer** - Produces final answers with confidence metrics:
41
+ - Uncertainty quantification
42
+ - Simulation recommendations
43
+ - Structured output with metadata
44
+
45
+ ### Domain Pack System
46
+
47
+ Domain packs are pluggable modules that enhance agent capabilities for specific domains:
48
+
49
+ - **Finance Domain Pack** (included):
50
+ - Market data integration
51
+ - News analysis
52
+ - Entity/ticker resolution
53
+ - Credibility scoring
54
+ - Stance detection
55
+ - Event analysis
56
+ - Prediction capabilities
57
+
58
+ - **Custom Domain Packs** (extensible):
59
+ - Implement `DomainPack` base class
60
+ - Register in domain registry
61
+ - Agents automatically detect and use capabilities
62
+
63
+ ### Autonomous Learning Layer
64
+
65
+ The system improves itself over time without local model training:
66
+
67
+ - **Knowledge Ingestion**: Automatically ingests knowledge from web search, news, and URLs
68
+ - **Experience Learning**: Learns from case execution patterns
69
+ - **Prompt Evolution**: A/B tests and evolves agent prompts
70
+ - **Skill Distillation**: Extracts reusable skills from repeated patterns
71
+ - **Trust Management**: Tracks source reliability over time
72
+ - **Freshness Management**: Manages knowledge expiration with domain-specific rules
73
+ - **Scheduler**: Runs learning tasks with CPU/battery safeguards for laptop deployment
74
+
75
+ Storage limits: 200MB max for knowledge cache, 2-4KB per summary.
76
 
77
  ## Quick Start
78
 
79
  ### Prerequisites
80
 
81
+ - Python 3.10+
82
+ - Node.js 18+ (for frontend)
83
+ - At least one LLM provider:
84
+ - OpenRouter API key (recommended), or
85
+ - Ollama running locally, or
86
+ - OpenAI API key
87
 
88
+ ### Backend Setup
89
 
90
+ 1. Clone and navigate to backend:
91
  ```bash
92
+ cd backend
93
+ python3 -m venv .venv
94
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
95
+ pip install -r requirements.txt
96
  ```
97
 
98
+ 2. Configure environment:
99
  ```bash
100
+ cp .env.example .env
101
+ # Edit .env and add your API keys
 
102
  ```
103
 
104
+ 3. Run backend:
105
  ```bash
106
+ python -m uvicorn app.main:app --reload --host 0.0.0.0 --port 8000
107
  ```
108
 
109
+ Backend available at `http://localhost:8000`
110
+
111
+ ### Frontend Setup
112
+
113
+ 1. Navigate to frontend:
114
  ```bash
115
+ cd frontend
116
+ npm install
117
  ```
118
 
119
+ 2. Configure environment:
120
  ```bash
121
+ cp .env.local.example .env.local
122
+ # Edit if needed (defaults to localhost:8000)
123
  ```
124
 
125
+ 3. Run frontend:
126
+ ```bash
127
+ npm run dev
128
+ ```
129
 
130
+ Frontend available at `http://localhost:3000`
131
+
132
+ ## Environment Variables
133
 
134
+ ### Required
 
135
 
136
+ ```env
137
+ # Primary LLM Provider (choose one)
138
+ PRIMARY_PROVIDER=openrouter # or ollama, openai
139
+ OPENROUTER_API_KEY=your_key_here
140
+ # OR
141
+ OLLAMA_ENABLED=true
142
+ # OR
143
+ OPENAI_API_KEY=your_key_here
144
  ```
145
 
146
+ ### Optional Services
 
147
 
148
+ ```env
149
+ # Web Search
150
+ TAVILY_API_KEY=your_key_here
 
 
 
151
 
152
+ # News
153
+ NEWSAPI_KEY=your_key_here
 
 
 
 
 
 
 
 
 
154
 
155
+ # Financial Data
156
+ ALPHAVANTAGE_API_KEY=your_key_here
157
 
158
+ # Simulation Lab
159
+ MIROFISH_ENABLED=true
160
+ MIROFISH_API_BASE=http://127.0.0.1:5001
161
 
162
+ # Learning Layer
163
+ LEARNING_ENABLED=true
164
+ KNOWLEDGE_MAX_SIZE_MB=200
165
+ LEARNING_TOPICS=finance,markets,technology,policy
166
  ```
167
 
168
+ See `.env.example` for complete configuration options.
169
+
170
+ ## API Endpoints
171
+
172
+ ### Core Endpoints
173
+
174
+ - `GET /health` - Health check
175
+ - `GET /health/deep` - Detailed health with provider status
176
+ - `GET /config/status` - System configuration
177
+ - `POST /run` - Process user request through agent pipeline
178
+ - `GET /cases` - List case history
179
+ - `GET /cases/{id}` - Get case details
180
+
181
+ ### Agent Endpoints
182
+
183
+ - `GET /agents` - List all agents
184
+ - `GET /agents/{name}` - Get agent details
185
+ - `POST /run/agent` - Run single agent
186
+
187
+ ### Prompt Management
188
+
189
+ - `GET /prompts` - List all prompts
190
+ - `GET /prompts/{name}` - Get prompt content
191
+ - `PUT /prompts/{name}` - Update prompt
192
+
193
+ ### Simulation Endpoints
194
+
195
+ - `POST /simulation/run` - Submit simulation request
196
+ - `GET /simulation/{id}` - Get simulation status
197
+ - `GET /simulation/{id}/report` - Get simulation report
198
+ - `POST /simulation/{id}/chat` - Chat with simulation
199
+
200
+ ### Learning Endpoints
201
+
202
+ - `GET /learning/status` - Learning engine status
203
+ - `POST /learning/run-once` - Manually trigger learning task
204
+ - `GET /learning/insights` - Learning insights
205
+ - `GET /learning/knowledge` - List knowledge items
206
+ - `GET /learning/knowledge/search` - Search knowledge
207
+ - `POST /learning/knowledge/ingest` - Ingest knowledge
208
+ - `GET /learning/skills` - List distilled skills
209
+ - `POST /learning/skills/distill` - Distill new skills
210
+ - `GET /learning/sources/trust` - Get trusted sources
211
+ - `GET /learning/sources/freshness` - Get stale items
212
+ - `GET /learning/prompts/versions/{name}` - Get prompt versions
213
+ - `POST /learning/prompts/optimize/{name}` - Optimize prompt
214
+ - `POST /learning/prompts/promote/{name}/{version}` - Promote prompt
215
+
216
  ## Project Structure
217
 
218
  ```
219
  backend/
220
  ├── app/
221
+ │ ├── agents/ # Five specialized agents
222
+ ├── _model.py # Multi-provider LLM abstraction
223
+ ├── switchboard.py # 4D routing
224
+ ├── research.py # Domain-enhanced research
225
+ ├── verifier.py # Credibility scoring
226
+ ├── planner.py # Simulation-aware planning
227
+ │ └── synthesizer.py # Final answer generation
228
+ │ ├── domain_packs/ # Pluggable domain intelligence
229
+ │ │ ├── base.py # DomainPack base class
230
+ │ │ ├── registry.py # Domain pack registry
231
+ │ │ ── finance/ # Finance domain pack
232
+ │ ├── services/ # Core services
233
+ │ │ ├── learning/ # Autonomous learning layer
234
+ │ │ │ ├── knowledge_ingestor.py
235
+ │ │ ├── knowledge_store.py
236
+ │ │ ├── learning_engine.py
237
+ │ │ ├── prompt_optimizer.py
238
+ │ │ ├── skill_distiller.py
239
+ │ │ │ ├── trust_manager.py
240
+ │ │ └── scheduler.py
241
+ ├── api_discovery/ # API discovery subsystem
242
+ │ ├── case_store.py # Case persistence
243
+ │ │ ├── simulation_store.py
244
+ │ │ └── external_sources.py
245
+ │ ├── routers/ # API routers
246
+ │ │ ├── simulation.py
247
+ │ │ └── learning.py
248
+ │ ├── prompts/ # Agent prompts
249
+ │ ├── data/ # Data storage
250
+ │ │ ├── memory/ # Case records
251
+ │ │ ├── simulations/ # Simulation records
252
+ │ │ ├── knowledge/ # Knowledge cache
253
+ │ │ ├── skills/ # Distilled skills
254
+ │ │ ├── prompt_versions/ # Prompt versions
255
+ │ │ └── learning/ # Learning metadata
256
+ │ ├── config.py # Configuration
257
+ │ ├── main.py # FastAPI app
258
+ │ ├── schemas.py # Pydantic models
259
+ │ └── graph.py # Agent orchestration
260
+
261
+ frontend/
262
+ ├── src/
263
+ │ ├── app/ # Next.js pages
264
+ │ │ ├── page.tsx # Dashboard
265
+ │ │ ├── analyze/ # Analysis interface
266
+ │ │ ├── cases/ # Case history
267
+ │ │ ├── simulation/ # Simulation interface
268
+ │ │ ├── prompts/ # Prompt lab
269
+ │ │ └── config/ # System config
270
+ │ ├── components/ # React components
271
+ │ │ ├── layout/
272
+ │ │ ├── common/
273
+ │ │ ├── analyze/
274
+ │ │ ├── cases/
275
+ │ │ ├── simulation/
276
+ │ │ └── prompts/
277
+ │ └── lib/
278
+ │ ├── api.ts # API client
279
+ │ └── types.ts # TypeScript types
280
  ```
281
 
282
  ## Development
283
 
284
+ ### Adding a New Domain Pack
285
 
286
+ 1. Create domain pack directory: `backend/app/domain_packs/your_domain/`
287
+ 2. Implement `DomainPack` base class in `pack.py`
288
+ 3. Add domain-specific modules (data sources, analyzers, etc.)
289
+ 4. Register in `backend/app/domain_packs/init_packs.py`
290
+ 5. Agents will automatically detect and use capabilities
291
 
292
+ ### Customizing Agent Prompts
293
 
294
+ 1. Navigate to `backend/app/prompts/`
295
+ 2. Edit prompt files (research.txt, planner.txt, etc.)
296
+ 3. Changes take effect immediately (no restart needed)
297
+ 4. Use Prompt Lab UI for live editing
298
 
299
+ ### Running with Different Providers
300
 
301
+ **OpenRouter (recommended for production):**
302
+ ```env
303
+ PRIMARY_PROVIDER=openrouter
304
+ OPENROUTER_API_KEY=your_key
305
+ ```
306
 
307
+ **Ollama (local, privacy-focused):**
308
+ ```env
309
+ PRIMARY_PROVIDER=ollama
310
+ OLLAMA_ENABLED=true
311
+ OLLAMA_BASE_URL=http://127.0.0.1:11434/api
312
+ ```
313
 
314
+ **OpenAI (high quality):**
315
+ ```env
316
+ PRIMARY_PROVIDER=openai
317
+ OPENAI_API_KEY=your_key
318
+ ```
319
 
320
+ ## Deployment
 
321
 
322
+ ### Single-User Local Deployment (Recommended)
 
 
323
 
324
+ This system is optimized for single-user local deployment on laptops (tested on 8GB/256GB M2 Air):
 
325
 
326
+ - Learning scheduler respects CPU usage (<50%) and battery level (>30%)
327
+ - Knowledge cache limited to 200MB with LRU eviction
328
+ - All data stored locally in `backend/app/data/`
329
+ - No cloud dependencies required
330
 
331
+ ### Production Deployment
332
 
333
+ For multi-user production deployment:
334
+
335
+ 1. Add authentication and authorization
336
+ 2. Use PostgreSQL instead of JSON storage
337
+ 3. Add rate limiting and request queuing
338
+ 4. Deploy with Docker/Kubernetes
339
+ 5. Use managed LLM services
340
+ 6. Add monitoring and alerting
341
 
342
  ## License
343
 
344
+ [Add your license here]
345
+
346
+ ## Contributing
347
+
348
+ [Add contribution guidelines here]
backend/.env.example CHANGED
@@ -66,4 +66,13 @@ SIMULATION_TRIGGER_KEYWORDS=simulate,predict,what if,reaction,scenario,public op
66
 
67
  # ---------- Domain Packs ----------
68
  # Enable/disable domain packs (future feature)
69
- FINANCE_DOMAIN_PACK_ENABLED=true
 
 
 
 
 
 
 
 
 
 
66
 
67
  # ---------- Domain Packs ----------
68
  # Enable/disable domain packs (future feature)
69
+ FINANCE_DOMAIN_PACK_ENABLED=true
70
+
71
+
72
+ # ---------- Sentinel Layer ----------
73
+ # Sentinel provides adaptive maintenance and self-healing
74
+ SENTINEL_ENABLED=true
75
+ SENTINEL_CYCLE_INTERVAL_MINUTES=60
76
+ SENTINEL_MAX_DIAGNOSES_PER_CYCLE=5
77
+ SENTINEL_CPU_THRESHOLD=80.0
78
+ SENTINEL_BATTERY_THRESHOLD=20.0
backend/app/agents/planner.py CHANGED
@@ -1,7 +1,45 @@
 
1
  from app.agents._model import call_model, LLMProviderError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  def run_planner(user_input: str, research_output: str, prompt_template: str) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  prompt = (
6
  f"{prompt_template}\n\n"
7
  f"User Request:\n{user_input}\n\n"
@@ -10,11 +48,16 @@ def run_planner(user_input: str, research_output: str, prompt_template: str) ->
10
 
11
  try:
12
  text = call_model(prompt, mode="chat")
 
 
13
  return {
14
  "agent": "planner",
15
  "summary": text,
16
- "details": {"model_mode": "chat"},
17
- "confidence": 0.75,
 
 
 
18
  }
19
  except LLMProviderError as e:
20
  return {
 
1
+ import re
2
  from app.agents._model import call_model, LLMProviderError
3
+ from app.config import SIMULATION_TRIGGER_KEYWORDS
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ _CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
9
+
10
+
11
+ def _extract_confidence(text: str, default: float = 0.5) -> float:
12
+ """Extract confidence score from structured LLM output."""
13
+ match = _CONFIDENCE_PATTERN.search(text)
14
+ if match:
15
+ try:
16
+ score = float(match.group(1))
17
+ return max(0.0, min(1.0, score))
18
+ except ValueError:
19
+ pass
20
+ return default
21
 
22
 
23
  def run_planner(user_input: str, research_output: str, prompt_template: str) -> dict:
24
+ # Detect if simulation mode would be appropriate
25
+ user_lower = user_input.lower()
26
+ simulation_suggested = any(keyword in user_lower for keyword in SIMULATION_TRIGGER_KEYWORDS)
27
+
28
+ # Check for scenario/prediction patterns in research
29
+ research_lower = research_output.lower()
30
+ scenario_patterns = ["scenario", "what if", "predict", "forecast", "impact", "reaction",
31
+ "what would", "how would", "could affect", "might happen"]
32
+ has_scenario_context = any(pattern in research_lower for pattern in scenario_patterns)
33
+
34
+ # Also check user input for scenario patterns
35
+ user_scenario_patterns = ["what would", "what if", "how would", "what happens",
36
+ "what could", "imagine", "suppose", "hypothetical"]
37
+ has_user_scenario = any(pattern in user_lower for pattern in user_scenario_patterns)
38
+
39
+ if (has_scenario_context or has_user_scenario) and not simulation_suggested:
40
+ simulation_suggested = True
41
+ logger.info("Planner detected scenario analysis opportunity - suggesting simulation mode")
42
+
43
  prompt = (
44
  f"{prompt_template}\n\n"
45
  f"User Request:\n{user_input}\n\n"
 
48
 
49
  try:
50
  text = call_model(prompt, mode="chat")
51
+ confidence = _extract_confidence(text, default=0.70)
52
+
53
  return {
54
  "agent": "planner",
55
  "summary": text,
56
+ "details": {
57
+ "model_mode": "chat",
58
+ "simulation_suggested": simulation_suggested
59
+ },
60
+ "confidence": confidence,
61
  }
62
  except LLMProviderError as e:
63
  return {
backend/app/agents/research.py CHANGED
@@ -1,23 +1,85 @@
 
1
  from app.agents._model import call_model, LLMProviderError
2
  from app.services.external_sources import build_external_context
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
 
5
  def run_research(user_input: str, prompt_template: str) -> dict:
6
  external_context = build_external_context(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  prompt = (
9
  f"{prompt_template}\n\n"
10
  f"User Request:\n{user_input}\n\n"
11
  f"External Context:\n{external_context}"
 
12
  )
13
 
14
  try:
15
  text = call_model(prompt, mode="chat")
 
 
 
 
 
 
 
 
16
  return {
17
  "agent": "research",
18
  "summary": text,
19
- "details": {"external_context_used": external_context != "No external API context available."},
20
- "confidence": 0.72,
 
 
 
 
 
 
21
  }
22
  except LLMProviderError as e:
23
  return {
 
1
+ import re
2
  from app.agents._model import call_model, LLMProviderError
3
  from app.services.external_sources import build_external_context
4
+ from app.domain_packs.registry import get_registry
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ _CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
10
+
11
+
12
+ def _extract_confidence(text: str, default: float = 0.5) -> float:
13
+ """Extract confidence score from structured LLM output."""
14
+ match = _CONFIDENCE_PATTERN.search(text)
15
+ if match:
16
+ try:
17
+ score = float(match.group(1))
18
+ return max(0.0, min(1.0, score))
19
+ except ValueError:
20
+ pass
21
+ return default
22
 
23
 
24
  def run_research(user_input: str, prompt_template: str) -> dict:
25
  external_context = build_external_context(user_input)
26
+
27
+ # Detect domain and enhance research with domain pack capabilities
28
+ registry = get_registry()
29
+ detected_domain = registry.detect_domain(user_input)
30
+
31
+ domain_enhanced_context = {}
32
+ if detected_domain:
33
+ logger.info(f"Enhancing research with domain pack: {detected_domain}")
34
+ pack = registry.get_pack(detected_domain)
35
+ if pack:
36
+ try:
37
+ base_context = {
38
+ "user_input": user_input,
39
+ "external_context": external_context
40
+ }
41
+ domain_enhanced_context = pack.enhance_research(user_input, base_context)
42
+ logger.info(f"Domain enhancement successful: {detected_domain}")
43
+ except Exception as e:
44
+ logger.warning(f"Domain enhancement failed for {detected_domain}: {e}")
45
+ domain_enhanced_context = {}
46
+
47
+ # Build enhanced prompt with domain context
48
+ domain_context_str = ""
49
+ if domain_enhanced_context:
50
+ domain_context_str = "\n\nDomain-Specific Context:\n"
51
+ for key, value in domain_enhanced_context.items():
52
+ if value:
53
+ domain_context_str += f"{key}: {value}\n"
54
 
55
  prompt = (
56
  f"{prompt_template}\n\n"
57
  f"User Request:\n{user_input}\n\n"
58
  f"External Context:\n{external_context}"
59
+ f"{domain_context_str}"
60
  )
61
 
62
  try:
63
  text = call_model(prompt, mode="chat")
64
+
65
+ # Extract structured entities from domain enhancement
66
+ entities = domain_enhanced_context.get("entities", []) if domain_enhanced_context else []
67
+ tickers = domain_enhanced_context.get("tickers", []) if domain_enhanced_context else []
68
+
69
+ # Extract confidence from LLM output (our prompt asks for it)
70
+ confidence = _extract_confidence(text, default=0.65)
71
+
72
  return {
73
  "agent": "research",
74
  "summary": text,
75
+ "details": {
76
+ "external_context_used": external_context != "No external API context available.",
77
+ "domain_pack": detected_domain or "general",
78
+ "entities": entities,
79
+ "tickers": tickers,
80
+ "domain_enhanced": bool(domain_enhanced_context)
81
+ },
82
+ "confidence": confidence,
83
  }
84
  except LLMProviderError as e:
85
  return {
backend/app/agents/switchboard.py CHANGED
@@ -23,16 +23,28 @@ def decide_route(user_input: str) -> dict:
23
  words = len(text.split())
24
 
25
  # Dimension 1: Task family (simulation detection)
 
26
  task_family = "simulation" if any(k in lower for k in SIMULATION_TRIGGER_KEYWORDS) else "normal"
27
 
 
 
 
 
 
 
 
 
28
  # Dimension 2: Domain pack detection
29
  registry = get_registry()
30
  detected_domain = registry.detect_domain(user_input)
31
  domain_pack = detected_domain if detected_domain else "general"
32
 
33
- # Dimension 3: Complexity based on word count
34
  if task_family == "simulation":
35
  complexity = "complex"
 
 
 
36
  elif words <= 5:
37
  complexity = "simple"
38
  elif words <= 25:
@@ -40,9 +52,12 @@ def decide_route(user_input: str) -> dict:
40
  else:
41
  complexity = "complex"
42
 
43
- # Dimension 4: Execution mode based on complexity
44
  if task_family == "simulation":
45
  execution_mode = "deep"
 
 
 
46
  elif complexity == "simple":
47
  execution_mode = "solo"
48
  elif complexity == "medium":
@@ -50,10 +65,18 @@ def decide_route(user_input: str) -> dict:
50
  else:
51
  execution_mode = "deep"
52
 
 
 
 
 
 
 
 
 
53
  return {
54
  "task_family": task_family,
55
  "domain_pack": domain_pack,
56
  "complexity": complexity,
57
  "execution_mode": execution_mode,
58
- "risk_level": "medium" if execution_mode == "deep" else "low",
59
  }
 
23
  words = len(text.split())
24
 
25
  # Dimension 1: Task family (simulation detection)
26
+ # Check configured keywords
27
  task_family = "simulation" if any(k in lower for k in SIMULATION_TRIGGER_KEYWORDS) else "normal"
28
 
29
+ # Additional scenario patterns that should also trigger deep analysis
30
+ scenario_patterns = [
31
+ "what would", "what if", "how would", "what happens if",
32
+ "what could", "imagine if", "suppose", "hypothetical",
33
+ "could affect", "might impact", "would react",
34
+ ]
35
+ is_speculative = any(p in lower for p in scenario_patterns)
36
+
37
  # Dimension 2: Domain pack detection
38
  registry = get_registry()
39
  detected_domain = registry.detect_domain(user_input)
40
  domain_pack = detected_domain if detected_domain else "general"
41
 
42
+ # Dimension 3: Complexity based on word count and nature
43
  if task_family == "simulation":
44
  complexity = "complex"
45
+ elif is_speculative:
46
+ # Speculative questions always get at least medium complexity
47
+ complexity = "complex" if words > 15 else "medium"
48
  elif words <= 5:
49
  complexity = "simple"
50
  elif words <= 25:
 
52
  else:
53
  complexity = "complex"
54
 
55
+ # Dimension 4: Execution mode based on complexity and nature
56
  if task_family == "simulation":
57
  execution_mode = "deep"
58
+ elif is_speculative:
59
+ # Speculative questions always get deep mode (verifier should check uncertainty)
60
+ execution_mode = "deep"
61
  elif complexity == "simple":
62
  execution_mode = "solo"
63
  elif complexity == "medium":
 
65
  else:
66
  execution_mode = "deep"
67
 
68
+ # Risk level
69
+ if execution_mode == "deep":
70
+ risk_level = "medium"
71
+ elif is_speculative:
72
+ risk_level = "medium"
73
+ else:
74
+ risk_level = "low"
75
+
76
  return {
77
  "task_family": task_family,
78
  "domain_pack": domain_pack,
79
  "complexity": complexity,
80
  "execution_mode": execution_mode,
81
+ "risk_level": risk_level,
82
  }
backend/app/agents/synthesizer.py CHANGED
@@ -1,4 +1,43 @@
 
1
  from app.agents._model import call_model, LLMProviderError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  def run_synthesizer(
@@ -8,6 +47,18 @@ def run_synthesizer(
8
  verifier_output: str,
9
  prompt_template: str
10
  ) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
11
  prompt = (
12
  f"{prompt_template}\n\n"
13
  f"User Request:\n{user_input}\n\n"
@@ -18,11 +69,27 @@ def run_synthesizer(
18
 
19
  try:
20
  text = call_model(prompt, mode="chat")
 
 
 
 
 
 
 
 
 
 
 
 
21
  return {
22
  "agent": "synthesizer",
23
  "summary": text,
24
- "details": {"model_mode": "chat"},
25
- "confidence": 0.82,
 
 
 
 
26
  }
27
  except LLMProviderError as e:
28
  return {
 
1
+ import re
2
  from app.agents._model import call_model, LLMProviderError
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ _CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
8
+ _UNCERTAINTY_PATTERN = re.compile(r'Uncertainty\s*Level:\s*(HIGH|MEDIUM|LOW)', re.IGNORECASE)
9
+
10
+
11
+ def _extract_confidence(text: str, default: float = 0.5) -> float:
12
+ """Extract confidence score from structured LLM output."""
13
+ match = _CONFIDENCE_PATTERN.search(text)
14
+ if match:
15
+ try:
16
+ score = float(match.group(1))
17
+ return max(0.0, min(1.0, score))
18
+ except ValueError:
19
+ pass
20
+ return default
21
+
22
+
23
+ def _extract_uncertainty(text: str) -> str:
24
+ """Extract uncertainty level from structured LLM output."""
25
+ match = _UNCERTAINTY_PATTERN.search(text)
26
+ if match:
27
+ return match.group(1).upper()
28
+
29
+ # Fallback heuristic
30
+ text_lower = text.lower()
31
+ uncertainty_indicators = ["uncertain", "unclear", "missing", "unverified",
32
+ "assumption", "unknown", "speculative", "conflicting",
33
+ "limited evidence", "cannot confirm"]
34
+ count = sum(1 for indicator in uncertainty_indicators if indicator in text_lower)
35
+
36
+ if count >= 4:
37
+ return "HIGH"
38
+ elif count >= 2:
39
+ return "MEDIUM"
40
+ return "LOW"
41
 
42
 
43
  def run_synthesizer(
 
47
  verifier_output: str,
48
  prompt_template: str
49
  ) -> dict:
50
+ # Extract uncertainty level from verifier output (or synthesizer will self-assess)
51
+ uncertainty_level = _extract_uncertainty(verifier_output)
52
+
53
+ # Check if simulation was recommended by planner or verifier
54
+ planner_lower = planner_output.lower()
55
+ simulation_recommended = (
56
+ ("simulation recommended: yes" in planner_lower) or
57
+ ("simulation" in planner_lower and "recommend" in planner_lower)
58
+ )
59
+
60
+ logger.info(f"Synthesizer: uncertainty_level={uncertainty_level}, simulation_recommended={simulation_recommended}")
61
+
62
  prompt = (
63
  f"{prompt_template}\n\n"
64
  f"User Request:\n{user_input}\n\n"
 
69
 
70
  try:
71
  text = call_model(prompt, mode="chat")
72
+ confidence = _extract_confidence(text, default=0.60)
73
+
74
+ # Also try to extract uncertainty from synthesizer's own output
75
+ synth_uncertainty = _extract_uncertainty(text)
76
+ # Use the higher uncertainty between verifier and synthesizer
77
+ if synth_uncertainty == "HIGH" or uncertainty_level == "HIGH":
78
+ final_uncertainty = "HIGH"
79
+ elif synth_uncertainty == "MEDIUM" or uncertainty_level == "MEDIUM":
80
+ final_uncertainty = "MEDIUM"
81
+ else:
82
+ final_uncertainty = "LOW"
83
+
84
  return {
85
  "agent": "synthesizer",
86
  "summary": text,
87
+ "details": {
88
+ "model_mode": "chat",
89
+ "uncertainty_level": final_uncertainty,
90
+ "simulation_recommended": simulation_recommended
91
+ },
92
+ "confidence": confidence,
93
  }
94
  except LLMProviderError as e:
95
  return {
backend/app/agents/verifier.py CHANGED
@@ -1,21 +1,94 @@
 
1
  from app.agents._model import call_model, LLMProviderError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
 
4
  def run_verifier(user_input: str, research_output: str, planner_output: str, prompt_template: str) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  prompt = (
6
  f"{prompt_template}\n\n"
7
  f"User Request:\n{user_input}\n\n"
8
  f"Research Packet:\n{research_output}\n\n"
9
  f"Planner Output:\n{planner_output}"
 
10
  )
11
 
12
  try:
13
  text = call_model(prompt, mode="reasoner")
 
 
 
 
 
 
 
 
 
14
  return {
15
  "agent": "verifier",
16
  "summary": text,
17
- "details": {"model_mode": "reasoner"},
18
- "confidence": 0.79,
 
 
 
 
 
 
 
19
  }
20
  except LLMProviderError as e:
21
  return {
 
1
+ import re
2
  from app.agents._model import call_model, LLMProviderError
3
+ from app.domain_packs.registry import get_registry
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ _CONFIDENCE_PATTERN = re.compile(r'Confidence:\s*([\d.]+)', re.IGNORECASE)
9
+
10
+
11
+ def _extract_confidence(text: str, default: float = 0.5) -> float:
12
+ """Extract confidence score from structured LLM output."""
13
+ match = _CONFIDENCE_PATTERN.search(text)
14
+ if match:
15
+ try:
16
+ score = float(match.group(1))
17
+ return max(0.0, min(1.0, score))
18
+ except ValueError:
19
+ pass
20
+ return default
21
 
22
 
23
  def run_verifier(user_input: str, research_output: str, planner_output: str, prompt_template: str) -> dict:
24
+ # Detect domain and enhance verification with domain pack capabilities
25
+ registry = get_registry()
26
+ detected_domain = registry.detect_domain(user_input)
27
+
28
+ domain_verification = {}
29
+ if detected_domain:
30
+ logger.info(f"Enhancing verification with domain pack: {detected_domain}")
31
+ pack = registry.get_pack(detected_domain)
32
+ if pack:
33
+ try:
34
+ # Extract claims from research and planner outputs for verification
35
+ claims = []
36
+ for line in (research_output + "\n" + planner_output).split("\n"):
37
+ stripped = line.strip()
38
+ if stripped and len(stripped) > 20 and not stripped.startswith(("Facts:", "Assumptions:", "Open Questions:", "Key Facts:", "Plan:", "Objective:")):
39
+ claims.append(stripped)
40
+
41
+ context = {
42
+ "user_input": user_input,
43
+ "research_output": research_output,
44
+ "planner_output": planner_output,
45
+ "claims": claims[:30] # Limit claims to avoid token overflow
46
+ }
47
+ domain_verification = pack.enhance_verification(claims[:30], context)
48
+ logger.info(f"Domain verification successful: {detected_domain}")
49
+ except Exception as e:
50
+ logger.warning(f"Domain verification failed for {detected_domain}: {e}")
51
+ domain_verification = {}
52
+
53
+ # Build enhanced prompt with domain verification
54
+ domain_verification_str = ""
55
+ if domain_verification:
56
+ domain_verification_str = "\n\nDomain-Specific Verification:\n"
57
+ for key, value in domain_verification.items():
58
+ if value:
59
+ domain_verification_str += f"{key}: {value}\n"
60
+
61
  prompt = (
62
  f"{prompt_template}\n\n"
63
  f"User Request:\n{user_input}\n\n"
64
  f"Research Packet:\n{research_output}\n\n"
65
  f"Planner Output:\n{planner_output}"
66
+ f"{domain_verification_str}"
67
  )
68
 
69
  try:
70
  text = call_model(prompt, mode="reasoner")
71
+
72
+ # Extract confidence from LLM output
73
+ confidence = _extract_confidence(text, default=0.70)
74
+
75
+ # Extract structured verification results
76
+ credibility_score = domain_verification.get("credibility_score", 0.5) if domain_verification else 0.5
77
+ rumors_detected = domain_verification.get("rumors_detected", []) if domain_verification else []
78
+ scams_detected = domain_verification.get("scams_detected", []) if domain_verification else []
79
+
80
  return {
81
  "agent": "verifier",
82
  "summary": text,
83
+ "details": {
84
+ "model_mode": "reasoner",
85
+ "domain_pack": detected_domain or "general",
86
+ "credibility_score": credibility_score,
87
+ "rumors_detected": rumors_detected,
88
+ "scams_detected": scams_detected,
89
+ "domain_verified": bool(domain_verification)
90
+ },
91
+ "confidence": confidence,
92
  }
93
  except LLMProviderError as e:
94
  return {
backend/app/config.py CHANGED
@@ -45,11 +45,6 @@ JINA_READER_BASE = os.getenv("JINA_READER_BASE", "https://r.jina.ai/http://")
45
  MIROFISH_ENABLED = os.getenv("MIROFISH_ENABLED", "false").lower() == "true"
46
  MIROFISH_API_BASE = os.getenv("MIROFISH_API_BASE", "http://127.0.0.1:5001")
47
  MIROFISH_TIMEOUT_SECONDS = int(os.getenv("MIROFISH_TIMEOUT_SECONDS", "120"))
48
- MIROFISH_HEALTH_PATH = os.getenv("MIROFISH_HEALTH_PATH", "/health")
49
- MIROFISH_RUN_PATH = os.getenv("MIROFISH_RUN_PATH", "/simulation/run")
50
- MIROFISH_STATUS_PATH = os.getenv("MIROFISH_STATUS_PATH", "/simulation/{id}")
51
- MIROFISH_REPORT_PATH = os.getenv("MIROFISH_REPORT_PATH", "/simulation/{id}/report")
52
- MIROFISH_CHAT_PATH = os.getenv("MIROFISH_CHAT_PATH", "/simulation/{id}/chat")
53
 
54
  SIMULATION_TRIGGER_KEYWORDS = [
55
  item.strip().lower()
@@ -143,3 +138,61 @@ def validate_config():
143
 
144
  # Run validation on import (startup)
145
  validate_config()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  MIROFISH_ENABLED = os.getenv("MIROFISH_ENABLED", "false").lower() == "true"
46
  MIROFISH_API_BASE = os.getenv("MIROFISH_API_BASE", "http://127.0.0.1:5001")
47
  MIROFISH_TIMEOUT_SECONDS = int(os.getenv("MIROFISH_TIMEOUT_SECONDS", "120"))
 
 
 
 
 
48
 
49
  SIMULATION_TRIGGER_KEYWORDS = [
50
  item.strip().lower()
 
138
 
139
  # Run validation on import (startup)
140
  validate_config()
141
+
142
+
143
+ # Learning layer configuration
144
+ LEARNING_ENABLED = os.getenv("LEARNING_ENABLED", "true").lower() == "true"
145
+ KNOWLEDGE_MAX_SIZE_MB = int(os.getenv("KNOWLEDGE_MAX_SIZE_MB", "200"))
146
+ LEARNING_SCHEDULE_INTERVAL = int(os.getenv("LEARNING_SCHEDULE_INTERVAL", "6")) # hours
147
+ LEARNING_BATCH_SIZE = int(os.getenv("LEARNING_BATCH_SIZE", "10"))
148
+ LEARNING_TOPICS = [
149
+ item.strip()
150
+ for item in os.getenv(
151
+ "LEARNING_TOPICS",
152
+ "finance,markets,technology,policy",
153
+ ).split(",")
154
+ if item.strip()
155
+ ]
156
+
157
+
158
+ def get_config():
159
+ """Get configuration object for dependency injection."""
160
+ class Config:
161
+ app_version = APP_VERSION
162
+ primary_provider = PRIMARY_PROVIDER
163
+ fallback_provider = FALLBACK_PROVIDER
164
+
165
+ openrouter_api_key = OPENROUTER_API_KEY
166
+ openrouter_base_url = OPENROUTER_BASE_URL
167
+ openrouter_chat_model = OPENROUTER_CHAT_MODEL
168
+ openrouter_reasoner_model = OPENROUTER_REASONER_MODEL
169
+
170
+ ollama_enabled = OLLAMA_ENABLED
171
+ ollama_base_url = OLLAMA_BASE_URL
172
+ ollama_chat_model = OLLAMA_CHAT_MODEL
173
+ ollama_reasoner_model = OLLAMA_REASONER_MODEL
174
+
175
+ openai_api_key = OPENAI_API_KEY
176
+ openai_base_url = OPENAI_BASE_URL
177
+ openai_chat_model = OPENAI_CHAT_MODEL
178
+ openai_reasoner_model = OPENAI_REASONER_MODEL
179
+
180
+ tavily_api_key = TAVILY_API_KEY
181
+ newsapi_key = NEWSAPI_KEY
182
+ alphavantage_api_key = ALPHAVANTAGE_API_KEY
183
+
184
+ mirofish_enabled = MIROFISH_ENABLED
185
+ mirofish_api_base = MIROFISH_API_BASE
186
+
187
+ data_dir = str(DATA_DIR)
188
+ memory_dir = str(MEMORY_DIR)
189
+ simulation_dir = str(SIMULATION_DIR)
190
+ prompts_dir = str(PROMPTS_DIR)
191
+
192
+ learning_enabled = LEARNING_ENABLED
193
+ knowledge_max_size_mb = KNOWLEDGE_MAX_SIZE_MB
194
+ learning_schedule_interval = LEARNING_SCHEDULE_INTERVAL
195
+ learning_batch_size = LEARNING_BATCH_SIZE
196
+ learning_topics = LEARNING_TOPICS
197
+
198
+ return Config()
backend/app/data/sentinel/.gitkeep ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Sentinel data directory
2
+ # This directory stores sentinel runtime data including:
3
+ # - alert_history.json
4
+ # - patch_history.json
5
+ # - cycle_history.json
6
+ # - capability_history.json
7
+ # - sentinel_config.json
8
+ # - pending_patches/
backend/app/data/sentinel/pending_patches/.gitkeep ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Pending patches directory
2
+ # This directory stores patches awaiting human review
3
+ # Format: {patch_id}.patch.json
backend/app/graph.py CHANGED
@@ -1,4 +1,5 @@
1
  import uuid
 
2
  import logging
3
  from typing import TypedDict, Dict, Any
4
 
@@ -14,9 +15,35 @@ from app.agents.synthesizer import run_synthesizer
14
  logger = logging.getLogger(__name__)
15
 
16
 
 
 
 
 
 
17
  def load_prompt(filename: str) -> str:
18
- path = PROMPTS_DIR / filename
19
- return path.read_text(encoding="utf-8")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  RESEARCH_PROMPT = load_prompt("research.txt")
@@ -44,51 +71,80 @@ def empty_output(agent_name: str) -> Dict[str, Any]:
44
  }
45
 
46
 
 
 
47
  def switchboard_node(state: OrgState):
48
- return {"route": decide_route(state["user_input"])}
 
 
 
 
49
 
50
 
51
  def research_node(state: OrgState):
52
  if state["route"].get("execution_mode") == "solo":
53
  return {"research": empty_output("research")}
54
- return {"research": run_research(state["user_input"], RESEARCH_PROMPT)}
 
 
 
 
 
 
55
 
56
 
57
  def planner_node(state: OrgState):
58
  if state["route"].get("execution_mode") == "solo":
59
  return {"planner": empty_output("planner")}
60
- return {
 
 
 
61
  "planner": run_planner(
62
  state["user_input"],
63
  state["research"]["summary"],
64
- PLANNER_PROMPT,
65
  )
66
  }
 
 
 
67
 
68
 
69
  def verifier_node(state: OrgState):
70
  if state["route"].get("execution_mode") != "deep":
71
  return {"verifier": empty_output("verifier")}
72
- return {
 
 
 
73
  "verifier": run_verifier(
74
  state["user_input"],
75
  state["research"]["summary"],
76
  state["planner"]["summary"],
77
- VERIFIER_PROMPT,
78
  )
79
  }
 
 
 
80
 
81
 
82
  def synthesizer_node(state: OrgState):
83
- return {
 
 
84
  "final": run_synthesizer(
85
  state["user_input"],
86
  state["research"]["summary"],
87
  state["planner"]["summary"],
88
  state["verifier"]["summary"],
89
- SYNTHESIZER_PROMPT,
90
  )
91
  }
 
 
 
92
 
93
 
94
  graph = StateGraph(OrgState)
@@ -110,6 +166,7 @@ compiled_graph = graph.compile()
110
 
111
  def run_case(user_input: str):
112
  case_id = str(uuid.uuid4())
 
113
  logger.info("Starting case %s", case_id)
114
 
115
  result = compiled_graph.invoke(
@@ -124,5 +181,6 @@ def run_case(user_input: str):
124
  }
125
  )
126
 
127
- logger.info("Case %s completed", case_id)
 
128
  return result
 
1
  import uuid
2
+ import time
3
  import logging
4
  from typing import TypedDict, Dict, Any
5
 
 
15
  logger = logging.getLogger(__name__)
16
 
17
 
18
+ # ── Prompt Loading with Production Version Support ────────────────────────────
19
+
20
+ _prompt_cache: Dict[str, str] = {}
21
+
22
+
23
  def load_prompt(filename: str) -> str:
24
+ """Load prompt from file, with caching."""
25
+ if filename not in _prompt_cache:
26
+ path = PROMPTS_DIR / filename
27
+ _prompt_cache[filename] = path.read_text(encoding="utf-8")
28
+ return _prompt_cache[filename]
29
+
30
+
31
+ def get_active_prompt(prompt_name: str, filename: str) -> str:
32
+ """
33
+ Get the active prompt, preferring a promoted production version.
34
+ Falls back to the file-based prompt if none is promoted.
35
+ """
36
+ try:
37
+ from app.routers.learning import learning_engine
38
+ if learning_engine:
39
+ production = learning_engine.get_active_prompt(prompt_name)
40
+ if production:
41
+ logger.debug(f"Using production prompt version for {prompt_name}")
42
+ return production
43
+ except Exception:
44
+ pass
45
+
46
+ return load_prompt(filename)
47
 
48
 
49
  RESEARCH_PROMPT = load_prompt("research.txt")
 
71
  }
72
 
73
 
74
+ # ── Node Functions with Timing ───────────────────────────────────────────────
75
+
76
  def switchboard_node(state: OrgState):
77
+ t0 = time.perf_counter()
78
+ result = {"route": decide_route(state["user_input"])}
79
+ elapsed = time.perf_counter() - t0
80
+ logger.info(f"[{state['case_id'][:8]}] switchboard: {elapsed:.2f}s — mode={result['route'].get('execution_mode')}")
81
+ return result
82
 
83
 
84
  def research_node(state: OrgState):
85
  if state["route"].get("execution_mode") == "solo":
86
  return {"research": empty_output("research")}
87
+
88
+ t0 = time.perf_counter()
89
+ prompt = get_active_prompt("research", "research.txt")
90
+ result = {"research": run_research(state["user_input"], prompt)}
91
+ elapsed = time.perf_counter() - t0
92
+ logger.info(f"[{state['case_id'][:8]}] research: {elapsed:.2f}s")
93
+ return result
94
 
95
 
96
  def planner_node(state: OrgState):
97
  if state["route"].get("execution_mode") == "solo":
98
  return {"planner": empty_output("planner")}
99
+
100
+ t0 = time.perf_counter()
101
+ prompt = get_active_prompt("planner", "planner.txt")
102
+ result = {
103
  "planner": run_planner(
104
  state["user_input"],
105
  state["research"]["summary"],
106
+ prompt,
107
  )
108
  }
109
+ elapsed = time.perf_counter() - t0
110
+ logger.info(f"[{state['case_id'][:8]}] planner: {elapsed:.2f}s")
111
+ return result
112
 
113
 
114
  def verifier_node(state: OrgState):
115
  if state["route"].get("execution_mode") != "deep":
116
  return {"verifier": empty_output("verifier")}
117
+
118
+ t0 = time.perf_counter()
119
+ prompt = get_active_prompt("verifier", "verifier.txt")
120
+ result = {
121
  "verifier": run_verifier(
122
  state["user_input"],
123
  state["research"]["summary"],
124
  state["planner"]["summary"],
125
+ prompt,
126
  )
127
  }
128
+ elapsed = time.perf_counter() - t0
129
+ logger.info(f"[{state['case_id'][:8]}] verifier: {elapsed:.2f}s")
130
+ return result
131
 
132
 
133
  def synthesizer_node(state: OrgState):
134
+ t0 = time.perf_counter()
135
+ prompt = get_active_prompt("synthesizer", "synthesizer.txt")
136
+ result = {
137
  "final": run_synthesizer(
138
  state["user_input"],
139
  state["research"]["summary"],
140
  state["planner"]["summary"],
141
  state["verifier"]["summary"],
142
+ prompt,
143
  )
144
  }
145
+ elapsed = time.perf_counter() - t0
146
+ logger.info(f"[{state['case_id'][:8]}] synthesizer: {elapsed:.2f}s")
147
+ return result
148
 
149
 
150
  graph = StateGraph(OrgState)
 
166
 
167
  def run_case(user_input: str):
168
  case_id = str(uuid.uuid4())
169
+ t0 = time.perf_counter()
170
  logger.info("Starting case %s", case_id)
171
 
172
  result = compiled_graph.invoke(
 
181
  }
182
  )
183
 
184
+ elapsed = time.perf_counter() - t0
185
+ logger.info("Case %s completed in %.2fs", case_id, elapsed)
186
  return result
backend/app/main.py CHANGED
@@ -1,7 +1,11 @@
 
 
1
  import logging
 
2
 
3
- from fastapi import FastAPI, HTTPException, Query
4
  from fastapi.middleware.cors import CORSMiddleware
 
5
 
6
  from app.schemas import UserTask, AgentRunRequest, PromptUpdateRequest
7
  from app.graph import run_case
@@ -18,22 +22,35 @@ from app.config import (
18
  MIROFISH_ENABLED,
19
  MEMORY_DIR,
20
  PROMPTS_DIR,
 
21
  )
22
  from app.services.case_store import list_cases, get_case, delete_case, memory_stats
23
  from app.services.prompt_store import list_prompts, get_prompt, update_prompt
24
  from app.services.health_service import deep_health
25
  from app.services.agent_registry import list_agents, get_agent, run_single_agent
26
  from app.routers.simulation import router as simulation_router
 
 
 
27
 
28
  logging.basicConfig(level=logging.INFO)
29
  logger = logging.getLogger(__name__)
30
 
31
- app = FastAPI(title="MiroOrg Basic", version=APP_VERSION)
32
 
33
- # Initialize domain packs on startup
34
  from app.domain_packs.init_packs import init_domain_packs
35
  init_domain_packs()
36
 
 
 
 
 
 
 
 
 
 
37
  app.add_middleware(
38
  CORSMiddleware,
39
  allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"],
@@ -43,8 +60,64 @@ app.add_middleware(
43
  )
44
 
45
  app.include_router(simulation_router)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
 
 
 
48
  @app.get("/health")
49
  def health():
50
  return {"status": "ok", "version": APP_VERSION}
@@ -72,6 +145,8 @@ def config_status():
72
  }
73
 
74
 
 
 
75
  @app.get("/agents")
76
  def agents():
77
  return list_agents()
@@ -85,12 +160,23 @@ def agent_detail(agent_name: str):
85
  return agent
86
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  @app.post("/run")
89
  def run_org(task: UserTask):
90
  try:
91
  logger.info("Processing /run: %s", task.user_input[:100])
92
  result = run_case(task.user_input)
93
-
94
  payload = {
95
  "case_id": result["case_id"],
96
  "user_input": result["user_input"],
@@ -103,24 +189,29 @@ def run_org(task: UserTask):
103
  ],
104
  "final_answer": result["final"]["summary"],
105
  }
106
-
107
  save_case(result["case_id"], payload)
 
 
 
 
108
  return payload
109
  except Exception as e:
110
  logger.exception("Error in /run")
111
- raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
112
 
113
 
114
  @app.post("/run/debug")
115
  def run_org_debug(task: UserTask):
116
  try:
117
- logger.info("Processing /run/debug: %s", task.user_input[:100])
118
  result = run_case(task.user_input)
119
  save_case(result["case_id"], result)
 
 
 
120
  return result
121
  except Exception as e:
122
  logger.exception("Error in /run/debug")
123
- raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
124
 
125
 
126
  @app.post("/run/agent")
@@ -137,9 +228,11 @@ def run_one_agent(request: AgentRunRequest):
137
  raise HTTPException(status_code=400, detail=str(e))
138
  except Exception as e:
139
  logger.exception("Error in /run/agent")
140
- raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
141
 
142
 
 
 
143
  @app.get("/cases")
144
  def cases(limit: int | None = Query(default=None, ge=1, le=200)):
145
  return list_cases(limit=limit)
@@ -174,6 +267,8 @@ def memory_stats_endpoint():
174
  return memory_stats()
175
 
176
 
 
 
177
  @app.get("/prompts")
178
  def prompts():
179
  return list_prompts()
@@ -185,7 +280,6 @@ def prompt_detail(name: str):
185
  prompt = get_prompt(name)
186
  except ValueError as e:
187
  raise HTTPException(status_code=400, detail=str(e))
188
-
189
  if not prompt:
190
  raise HTTPException(status_code=404, detail="Prompt not found")
191
  return prompt
 
1
+ import asyncio
2
+ import time
3
  import logging
4
+ import os
5
 
6
+ from fastapi import FastAPI, HTTPException, Query, Request
7
  from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.responses import JSONResponse
9
 
10
  from app.schemas import UserTask, AgentRunRequest, PromptUpdateRequest
11
  from app.graph import run_case
 
22
  MIROFISH_ENABLED,
23
  MEMORY_DIR,
24
  PROMPTS_DIR,
25
+ get_config,
26
  )
27
  from app.services.case_store import list_cases, get_case, delete_case, memory_stats
28
  from app.services.prompt_store import list_prompts, get_prompt, update_prompt
29
  from app.services.health_service import deep_health
30
  from app.services.agent_registry import list_agents, get_agent, run_single_agent
31
  from app.routers.simulation import router as simulation_router
32
+ from app.routers.learning import router as learning_router, init_learning_services, start_scheduler_background
33
+ from app.routers.sentinel import router as sentinel_router
34
+ from app.routers.finance import router as finance_router
35
 
36
  logging.basicConfig(level=logging.INFO)
37
  logger = logging.getLogger(__name__)
38
 
39
+ app = FastAPI(title="MiroOrg v1.1", version=APP_VERSION)
40
 
41
+ # Initialize domain packs
42
  from app.domain_packs.init_packs import init_domain_packs
43
  init_domain_packs()
44
 
45
+ # Initialize learning layer
46
+ config = get_config()
47
+ if config.learning_enabled:
48
+ try:
49
+ init_learning_services(config)
50
+ logger.info("Learning layer initialized")
51
+ except Exception as e:
52
+ logger.error(f"Failed to initialize learning layer: {e}")
53
+
54
  app.add_middleware(
55
  CORSMiddleware,
56
  allow_origins=["http://localhost:3000", "http://127.0.0.1:3000"],
 
60
  )
61
 
62
  app.include_router(simulation_router)
63
+ app.include_router(learning_router)
64
+ app.include_router(sentinel_router)
65
+ app.include_router(finance_router)
66
+
67
+
68
+ # ── Request Timing Middleware ─────────────────────────────────────────────────
69
+
70
+ @app.middleware("http")
71
+ async def timing_middleware(request: Request, call_next):
72
+ """Add X-Process-Time header and log slow requests."""
73
+ start = time.perf_counter()
74
+ response = await call_next(request)
75
+ elapsed = time.perf_counter() - start
76
+ response.headers["X-Process-Time"] = f"{elapsed:.3f}"
77
+
78
+ if elapsed > 10.0:
79
+ logger.warning(f"Slow request: {request.method} {request.url.path} took {elapsed:.1f}s")
80
+
81
+ return response
82
+
83
+
84
+ # ── Error Handler ─────────────────────────────────────────────────────────────
85
+
86
+ @app.exception_handler(Exception)
87
+ async def global_exception_handler(request: Request, exc: Exception):
88
+ """Sanitize error responses — don't leak internal stack traces."""
89
+ logger.exception(f"Unhandled error on {request.method} {request.url.path}")
90
+ return JSONResponse(
91
+ status_code=500,
92
+ content={"detail": "An internal error occurred. Please try again."},
93
+ )
94
+
95
+
96
+ # ── Startup Event ─────────────────────────────────────────────────────────────
97
+
98
+ @app.on_event("startup")
99
+ async def on_startup():
100
+ """Start background tasks on app startup."""
101
+ if config.learning_enabled:
102
+ try:
103
+ start_scheduler_background()
104
+ logger.info("Background learning scheduler started")
105
+ except Exception as e:
106
+ logger.error(f"Failed to start learning scheduler: {e}")
107
+
108
+ # Start sentinel scheduler
109
+ sentinel_enabled = os.getenv("SENTINEL_ENABLED", "true").lower() == "true"
110
+ if sentinel_enabled:
111
+ try:
112
+ from app.services.sentinel.scheduler import start_sentinel_scheduler
113
+ start_sentinel_scheduler()
114
+ logger.info("Sentinel scheduler started")
115
+ except Exception as e:
116
+ logger.error(f"Failed to start sentinel scheduler: {e}")
117
 
118
 
119
+ # ── Health ────────────────────────────────────────────────────────────────────
120
+
121
  @app.get("/health")
122
  def health():
123
  return {"status": "ok", "version": APP_VERSION}
 
145
  }
146
 
147
 
148
+ # ── Agents ────────────────────────────────────────────────────────────────────
149
+
150
  @app.get("/agents")
151
  def agents():
152
  return list_agents()
 
160
  return agent
161
 
162
 
163
+ # ── Case Execution ───────────────��────────────────────────────────────────────
164
+
165
+ def _fire_and_forget_learning(payload: dict):
166
+ """Fire-and-forget learning from a completed case."""
167
+ from app.routers.learning import learning_engine as _le
168
+ if _le:
169
+ try:
170
+ _le.learn_from_case(payload)
171
+ except Exception as e:
172
+ logger.error(f"Learning from case failed (non-blocking): {e}")
173
+
174
+
175
  @app.post("/run")
176
  def run_org(task: UserTask):
177
  try:
178
  logger.info("Processing /run: %s", task.user_input[:100])
179
  result = run_case(task.user_input)
 
180
  payload = {
181
  "case_id": result["case_id"],
182
  "user_input": result["user_input"],
 
189
  ],
190
  "final_answer": result["final"]["summary"],
191
  }
 
192
  save_case(result["case_id"], payload)
193
+
194
+ # Fire-and-forget: learn from this case
195
+ _fire_and_forget_learning(payload)
196
+
197
  return payload
198
  except Exception as e:
199
  logger.exception("Error in /run")
200
+ raise HTTPException(status_code=500, detail="Failed to process request. Please try again.")
201
 
202
 
203
  @app.post("/run/debug")
204
  def run_org_debug(task: UserTask):
205
  try:
 
206
  result = run_case(task.user_input)
207
  save_case(result["case_id"], result)
208
+
209
+ _fire_and_forget_learning(result)
210
+
211
  return result
212
  except Exception as e:
213
  logger.exception("Error in /run/debug")
214
+ raise HTTPException(status_code=500, detail="Failed to process request. Please try again.")
215
 
216
 
217
  @app.post("/run/agent")
 
228
  raise HTTPException(status_code=400, detail=str(e))
229
  except Exception as e:
230
  logger.exception("Error in /run/agent")
231
+ raise HTTPException(status_code=500, detail="Failed to run agent. Please try again.")
232
 
233
 
234
+ # ── Cases ─────────────────────────────────────────────────────────────────────
235
+
236
  @app.get("/cases")
237
  def cases(limit: int | None = Query(default=None, ge=1, le=200)):
238
  return list_cases(limit=limit)
 
267
  return memory_stats()
268
 
269
 
270
+ # ── Prompts ───────────────────────────────────────────────────────────────────
271
+
272
  @app.get("/prompts")
273
  def prompts():
274
  return list_prompts()
 
280
  prompt = get_prompt(name)
281
  except ValueError as e:
282
  raise HTTPException(status_code=400, detail=str(e))
 
283
  if not prompt:
284
  raise HTTPException(status_code=404, detail="Prompt not found")
285
  return prompt
backend/app/prompts/planner.txt CHANGED
@@ -1,18 +1,90 @@
1
- You are the Planner Agent in an AI organization.
2
-
3
- Your job:
4
- - take the research packet
5
- - produce a clear answer or step-by-step plan
6
- - be practical and structured
7
- - do not invent evidence
8
- - make the output actionable
9
-
10
- Output format:
11
- Plan:
12
- 1. ...
13
- 2. ...
14
- Risks:
15
- - ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  Dependencies:
17
- - ...
18
- Confidence: <0 to 1>
 
 
 
 
 
 
 
 
 
1
+ You are the Planner Agent in MiroOrg — a 5-layer AI intelligence organization.
2
+
3
+ ═══════════════════════════════════════════════════════════════
4
+ ROLE
5
+ ═══════════════════════════════════════════════════════════════
6
+ You receive the Research Agent's research packet and transform it into an actionable strategy. You are the strategic thinker — you decide WHAT should be done, in WHAT order, and with what resources. You also serve as the gateway to MiroFish simulation mode.
7
+
8
+ ═══════════════════════════════════════════════════════════════
9
+ CAPABILITIES YOU HAVE ACCESS TO
10
+ ═══════════════════════════════════════════════════════════════
11
+ • Full research packet with facts, entities, market data, domain insights
12
+ Simulation mode via MiroFish — a digital twin engine that can model multi-agent scenarios, stakeholder reactions, market impacts, and policy cascades
13
+ Knowledge from past cases — the system learns from previous analyses and can apply distilled skills
14
+
15
+ ═══════════════════════════════════════════════════════════════
16
+ INSTRUCTIONS
17
+ ═══════════════════════════════════════════════════════════════
18
+
19
+ 1. ANALYZE THE RESEARCH PACKET
20
+ - Identify the user's core objective (what do they actually need?)
21
+ - Distinguish between informational needs and decision-making needs
22
+ - Note the complexity level: Is this a simple lookup, a multi-step analysis, or a strategic decision?
23
+
24
+ 2. CREATE A STRUCTURED PLAN
25
+ - Break the response into clear, numbered steps
26
+ - Each step should be actionable and specific
27
+ - Include conditional branches where outcomes are uncertain ("If X, then Y; otherwise Z")
28
+ - Prioritize steps by impact and urgency
29
+
30
+ 3. RISK ASSESSMENT
31
+ - What could go wrong with this plan?
32
+ - What are the key assumptions that could invalidate the strategy?
33
+ - What external factors could change the situation?
34
+ - Rate each risk: HIGH / MEDIUM / LOW impact and probability
35
+
36
+ 4. RESOURCE & DEPENDENCY MAPPING
37
+ - What information is needed that we don't have?
38
+ - What actions depend on other actions completing first?
39
+ - What external factors or decisions are blocking?
40
+
41
+ 5. TIMELINE ESTIMATION
42
+ - If the plan involves actions: suggest reasonable timeframes
43
+ - If the plan involves monitoring: suggest check-in intervals
44
+ - Note time-sensitive elements that could expire
45
+
46
+ 6. SIMULATION MODE DETECTION (CRITICAL)
47
+ The system has MiroFish — a powerful simulation engine that creates digital twins of scenarios and models multi-agent interactions. You MUST recommend simulation mode when ANY of these apply:
48
+
49
+ ALWAYS recommend simulation when the user:
50
+ → Asks "what if" or "what would happen if"
51
+ → Wants to predict outcomes or forecast trends
52
+ → Needs to model stakeholder reactions (market, public, government, competitors)
53
+ → Is evaluating policy impact or regulatory changes
54
+ → Needs scenario comparison (option A vs option B)
55
+ → Is dealing with complex multi-party dynamics
56
+ → Asks about public opinion, sentiment shifts, or social reactions
57
+ → Wants to stress-test a strategy or business decision
58
+
59
+ When recommending simulation, explain:
60
+ → WHY simulation adds value over static analysis
61
+ → WHAT kind of simulation would be most useful
62
+ → WHAT stakeholders/agents should be modeled
63
+
64
+ ═══════════════════════════════════════════════════════════════
65
+ OUTPUT FORMAT (follow strictly)
66
+ ═══════════════════════════════════════════════════════════════
67
+
68
+ Objective:
69
+ <What the user actually needs, in one clear sentence>
70
+
71
+ Strategic Plan:
72
+ 1. <Step> — <Why this step matters>
73
+ 2. <Step> — <Why this step matters>
74
+ 3. <Step> — <Why this step matters>
75
+ → Contingency: <If X fails, then...>
76
+
77
+ Risk Assessment:
78
+ - [HIGH/MEDIUM/LOW] <risk> — <probability> — <mitigation>
79
+
80
  Dependencies:
81
+ - <What depends on what>
82
+
83
+ Timeline:
84
+ - <Step X>: <estimated timeframe or urgency>
85
+
86
+ Simulation Recommended: YES / NO
87
+ Simulation Rationale: <If YES: explain what kind of simulation, which stakeholders to model, and what insights it would generate. If NO: explain why static analysis is sufficient.>
88
+
89
+ Confidence: <0.0 to 1.0>
90
+ Reasoning: <one sentence explaining your confidence level>
backend/app/prompts/research.txt CHANGED
@@ -1,19 +1,87 @@
1
- You are the Research Agent in an AI organization.
2
-
3
- Your job:
4
- - extract the key facts from the user request
5
- - identify assumptions
6
- - identify missing information
7
- - use external API context when provided
8
- - return a concise research packet
9
-
10
- Output format:
11
- Facts:
12
- - ...
13
- Assumptions:
14
- - ...
15
- Open Questions:
16
- - ...
17
- Useful Signals:
18
- - ...
19
- Confidence: <0 to 1>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are the Research Agent in MiroOrg — a 5-layer AI intelligence organization.
2
+
3
+ ═══════════════════════════════════════════════════════════════
4
+ ROLE
5
+ ═══════════════════════════════════════════════════════════════
6
+ You are the first analyst in the pipeline. Your research packet becomes the foundation for every downstream agent (Planner, Verifier, Synthesizer). Thoroughness and structure here directly determines the quality of the final answer.
7
+
8
+ ═══════════════════════════════════════════════════════════════
9
+ CAPABILITIES YOU HAVE ACCESS TO
10
+ ═══════════════════════════════════════════════════════════════
11
+ The system automatically provides you with:
12
+ • Web search results (Tavily) — real-time web intelligence
13
+ • News articles (NewsAPI) — recent headlines and reporting
14
+ Market data (Alpha Vantage) — live stock quotes for detected tickers
15
+ URL content (Jina Reader) — full-text extraction from any links the user provides
16
+ Domain pack insights — when a specialized domain is detected (e.g., finance), you receive entity resolution, ticker mapping, sentiment signals, event analysis, and credibility scores
17
+ Knowledge base — the learning layer may supply distilled knowledge from past research
18
+
19
+ Use ALL of this context. Never ignore provided data. If external context is minimal, state what is missing and why it limits your analysis.
20
+
21
+ ═══════════════════════════════════════════════════════════════
22
+ INSTRUCTIONS
23
+ ═══════════════════════════════════════════════════════════════
24
+
25
+ 1. EXTRACT & STRUCTURE KEY FACTS
26
+ - Separate hard facts (verified, sourced) from soft signals (opinions, projections)
27
+ - Attribute each fact to its source when possible
28
+ - Note temporal relevance (how recent is each data point?)
29
+
30
+ 2. ENTITY RESOLUTION
31
+ - Identify all entities: companies, people, organizations, countries, products, concepts
32
+ - For companies: map to official names and stock tickers (e.g., "Apple" → Apple Inc. ($AAPL))
33
+ - For people: note their role and relevance
34
+ - For events: note dates and impact scope
35
+
36
+ 3. DOMAIN-SPECIFIC DEEP DIVE
37
+ When domain context is detected (finance, policy, technology, etc.):
38
+ - Use market data to ground quantitative claims (price, volume, market cap)
39
+ - Use news context to identify trending narratives and sentiment shifts
40
+ - Note which sources are high-credibility vs. low-credibility
41
+ - Extract stance signals: bullish/bearish, supportive/opposing, optimistic/pessimistic
42
+ - Identify event catalysts: earnings, regulatory actions, mergers, policy changes
43
+
44
+ 4. GAP ANALYSIS
45
+ - What critical information is NOT available?
46
+ - What assumptions must the user be aware of?
47
+ - What would change the analysis if verified differently?
48
+
49
+ 5. SIGNAL DETECTION
50
+ - Contradictions between sources
51
+ - Unusual patterns or anomalies
52
+ - Emerging narratives that haven't been confirmed
53
+ - Risks or red flags that need the Verifier's attention
54
+
55
+ ═══════════════════════════════════════════════════════════════
56
+ OUTPUT FORMAT (follow strictly)
57
+ ═══════════════════════════════════════════════════════════════
58
+
59
+ Key Facts:
60
+ - [FACT] <fact> (Source: <source name>)
61
+ - [FACT] <fact> (Source: <source name>)
62
+
63
+ Entities Detected:
64
+ - <entity name> — <type> — <relevance to query>
65
+ → Ticker: $XXX (if applicable)
66
+
67
+ Market & Quantitative Data:
68
+ - <data point with attribution>
69
+
70
+ Domain Insights:
71
+ - <domain-specific finding or signal>
72
+
73
+ Sentiment & Stance:
74
+ - <source/entity>: <bullish/bearish/neutral/mixed> — <brief reasoning>
75
+
76
+ Source Assessment:
77
+ - <source name>: <high/medium/low credibility> — <why>
78
+
79
+ Gaps & Assumptions:
80
+ - [GAP] <what's missing and why it matters>
81
+ - [ASSUMPTION] <assumption being made>
82
+
83
+ Red Flags for Verifier:
84
+ - <anything that needs skeptical examination>
85
+
86
+ Confidence: <0.0 to 1.0>
87
+ Reasoning: <one sentence explaining your confidence level>
backend/app/prompts/synthesizer.txt CHANGED
@@ -1,15 +1,103 @@
1
- You are the Synthesizer Agent in an AI organization.
2
 
3
- Your job:
4
- - combine the research, plan, and verification review
5
- - produce one final answer
6
- - be clear, grounded, and honest about uncertainty
7
- - if the request sounds like a simulation/prediction task, suggest using simulation mode too
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- Output format:
10
  Final Answer:
11
- ...
12
- Uncertainty:
13
- ...
14
- Next Best Step:
15
- ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are the Synthesizer Agent in MiroOrg — a 5-layer AI intelligence organization.
2
 
3
+ ═══════════════════════════════════════════════════════════════
4
+ ROLE
5
+ ═══════════════════════════════════════════════════════════════
6
+ You are the FINAL voice. You receive everything — the Research packet, the Planner's strategy, and the Verifier's assessment — and you produce the definitive answer that the user sees. Your output IS the product. It must be clear, honest, actionable, and carry appropriate certainty levels.
7
+
8
+ ═══════════════════════════════════════════════════════════════
9
+ SYSTEM CAPABILITIES TO REFERENCE
10
+ ═══════════════════════════════════════════════════════════════
11
+ You can recommend these capabilities to the user:
12
+ • Simulation Mode (MiroFish) — for scenario modeling, "what if" analysis, stakeholder reaction modeling, multi-agent simulations, and digital twin creation
13
+ • Domain Intelligence — specialized analysis packs (finance: market data, entity resolution, event analysis, predictions)
14
+ • Learning Layer — the system continuously improves from past cases, tracks source trust, and evolves its prompts
15
+
16
+ ═══════════════════════════════════════════════════════════════
17
+ INSTRUCTIONS
18
+ ═══════════════════════════════════════════════════════════════
19
+
20
+ 1. INTEGRATE ALL INPUTS
21
+ - Research provides the RAW FACTS and SIGNALS
22
+ - Planner provides the STRATEGIC FRAMEWORK
23
+ - Verifier provides the QUALITY ASSESSMENT and UNCERTAINTY MAP
24
+
25
+ Resolve conflicts between them:
26
+ → If Research says X but Verifier flagged X as unverified → mention X with caveat
27
+ → If Planner recommended action A but Verifier found it risky → recommend A with risk mitigation
28
+ → If sources disagree → present the disagreement honestly, don't pick sides without evidence
29
+
30
+ 2. WRITE THE FINAL ANSWER
31
+ The final answer must be:
32
+ - DIRECT: Lead with the answer, not the process. What does the user need to know FIRST?
33
+ - GROUNDED: Every claim should reference the evidence that supports it
34
+ - HONEST: State what you know, what you don't, and how confident you are
35
+ - ACTIONABLE: End with what the user should DO next
36
+ - READABLE: Use clear paragraphs, not walls of text. Use structure where helpful.
37
+
38
+ 3. UNCERTAINTY COMMUNICATION (CRITICAL)
39
+ Never hide uncertainty. The user trusts this system because it's honest about what it doesn't know.
40
+
41
+ Use these guidelines:
42
+ - HIGH uncertainty: Lead with a prominent caveat. "Based on limited/conflicting information..."
43
+ - MEDIUM uncertainty: Weave caveats naturally. "While X suggests..., there is uncertainty around..."
44
+ - LOW uncertainty: State with confidence but note the basis. "Based on multiple verified sources..."
45
+
46
+ Always specify:
47
+ → What would change your answer if new information emerged
48
+ → What the user should validate independently
49
+
50
+ 4. SIMULATION RECOMMENDATION (WHEN APPROPRIATE)
51
+ If the Planner recommended simulation mode, OR if you detect the user would benefit from it, actively recommend the MiroFish Simulation Lab.
52
+
53
+ Frame it as:
54
+ "💡 This question would benefit from simulation mode. MiroFish can create a digital twin of this scenario and model [specific stakeholders/dynamics]. To run a simulation, use the Simulation Lab with your scenario details."
55
+
56
+ Recommend simulation when:
57
+ → The answer involves too many unknowns to give a confident static analysis
58
+ → Multiple stakeholders would react differently to the same event
59
+ → The user is making a decision that could go multiple ways
60
+ → Temporal dynamics matter (how things evolve over time)
61
+
62
+ 5. CONFIDENCE CALIBRATION
63
+ Your confidence score must be CALIBRATED — don't default to generic values.
64
+
65
+ 0.9–1.0: Multiple verified sources agree, well-established facts
66
+ 0.7–0.89: Strong evidence with minor gaps, reliable sources
67
+ 0.5–0.69: Mixed evidence, some uncertainty, qualified conclusions
68
+ 0.3–0.49: Significant uncertainty, limited evidence, speculative elements
69
+ 0.0–0.29: Very little evidence, highly speculative, contradictory sources
70
+
71
+ ═══════════════════════════════════════════════════════════════
72
+ OUTPUT FORMAT (follow strictly)
73
+ ═══════════════════════════════════════════════════════════════
74
 
 
75
  Final Answer:
76
+ <Your comprehensive, direct answer. Lead with the most important insight. Use paragraphs for readability. Ground claims in evidence. Be honest about limitations.>
77
+
78
+ Key Findings:
79
+ - <Most important finding with evidence basis>
80
+ - <Second most important finding>
81
+ - <Third most important finding>
82
+
83
+ Uncertainty Level: HIGH / MEDIUM / LOW
84
+ Uncertainty Details:
85
+ - <What we're uncertain about and why>
86
+ - <What could change this answer>
87
+
88
+ Caveats:
89
+ - <Important limitations the user should be aware of>
90
+
91
+ Next Actions:
92
+ 1. <Most important thing the user should do>
93
+ 2. <Second priority action>
94
+ 3. <Optional: additional recommended steps>
95
+
96
+ Simulation Recommended: YES / NO
97
+ Simulation Details: <If YES: what scenario to simulate, what stakeholders to model, what insights to expect. If NO: why static analysis is sufficient.>
98
+
99
+ Sources Used:
100
+ - <Key sources that informed this answer>
101
+
102
+ Confidence: <0.0 to 1.0>
103
+ Reasoning: <one sentence explaining exactly why this confidence level>
backend/app/prompts/verifier.txt CHANGED
@@ -1,16 +1,111 @@
1
- You are the Verifier Agent in an AI organization.
2
-
3
- Your job:
4
- - challenge the planner
5
- - find unsupported claims
6
- - identify missing evidence or weak logic
7
- - suggest corrections
8
- - be skeptical but constructive
9
-
10
- Output format:
11
- Issues Found:
12
- - ...
13
- Corrections:
14
- - ...
15
- Approved: yes/no
16
- Confidence: <0 to 1>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are the Verifier Agent in MiroOrg — a 5-layer AI intelligence organization.
2
+
3
+ ═══════════════════════════════════════════════════════════════
4
+ ROLE
5
+ ═══════════════════════════════════════════════════════════════
6
+ You are the system's critical thinker and quality gatekeeper. You receive the Research packet AND the Planner's strategy, and your job is to STRESS-TEST everything before it reaches the Synthesizer. You are skeptical, thorough, and constructive. Nothing gets past you unchecked.
7
+
8
+ ═══════════════════════════════════════════════════════════════
9
+ CAPABILITIES YOU HAVE ACCESS TO
10
+ ═══════════════════════════════════════════════════════════════
11
+ Domain-specific verification tools — when a specialized domain is detected (e.g., finance), the system provides:
12
+ - Source credibility scores for each information source
13
+ - Rumor detection results (flagged unverified claims)
14
+ - Scam detection results (flagged fraudulent patterns)
15
+ - Stance analysis (who is saying what, and why)
16
+ - Event impact assessment
17
+ • Cross-reference capabilities from external APIs
18
+ • Trust scores from the learning layer — historical source reliability data
19
+
20
+ ═══════════════════════════════════════════════════════════════
21
+ INSTRUCTIONS
22
+ ═══════════════════════════════════════════════════════════════
23
+
24
+ 1. CLAIM EXTRACTION & VERIFICATION
25
+ - Extract every factual claim from both the Research packet and the Planner output
26
+ - For each claim, assess:
27
+ → Is this sourced? From where?
28
+ → Is the source reliable? (check credibility scores if provided)
29
+ → Is this a fact, an opinion, or a projection?
30
+ → Are there contradicting sources?
31
+ → How current is this information?
32
+
33
+ 2. LOGIC & REASONING CHECK
34
+ - Does the Planner's strategy logically follow from the Research?
35
+ - Are there logical fallacies or unsupported leaps?
36
+ - Are conditional statements properly structured?
37
+ - Are cause-effect relationships validated?
38
+
39
+ 3. BIAS & MANIPULATION DETECTION
40
+ - Check for confirmation bias (only supporting evidence cited)
41
+ - Check for selection bias (cherry-picked data)
42
+ - Check for framing effects (how information is presented)
43
+ - Check for astroturfing or coordinated narrative campaigns
44
+ - If financial domain: check for pump-and-dump patterns, misleading projections
45
+
46
+ 4. RUMOR & SCAM DETECTION (USE DOMAIN TOOLS)
47
+ When domain verification data is provided:
48
+ - Review all flagged rumors and rate their risk
49
+ - Review all flagged scams and rate their severity
50
+ - Note any sources that appear in known unreliable source lists
51
+ - Identify patterns consistent with market manipulation or misinformation
52
+
53
+ 5. UNCERTAINTY QUANTIFICATION
54
+ This is your MOST IMPORTANT output. The Synthesizer depends on your uncertainty assessment.
55
+
56
+ Rate uncertainty on THREE dimensions:
57
+ a) DATA COMPLETENESS: How much of the needed information do we actually have?
58
+ → Complete / Mostly Complete / Partial / Sparse / Missing Critical Data
59
+ b) SOURCE RELIABILITY: How trustworthy are the sources collectively?
60
+ → Highly Reliable / Generally Reliable / Mixed / Questionable / Unreliable
61
+ c) TEMPORAL VALIDITY: How current and still-relevant is the information?
62
+ → Current / Mostly Current / Aging / Stale / Outdated
63
+
64
+ 6. CORRECTION & IMPROVEMENT
65
+ - Don't just criticize — suggest specific corrections
66
+ - If a claim is wrong, state what the correct information is (if known)
67
+ - If a plan step is risky, suggest a safer alternative
68
+ - If information is missing, specify exactly what's needed
69
+
70
+ ═══════════════════════════════════════════════════════════════
71
+ OUTPUT FORMAT (follow strictly)
72
+ ═══════════════════════════════════════════════════════════════
73
+
74
+ Claims Verified:
75
+ - ✅ <claim> — Verified (Source: <source>, Credibility: <high/medium/low>)
76
+ - ⚠️ <claim> — Partially Verified (Reason: <why>)
77
+ - ❌ <claim> — Unverified/False (Reason: <why>)
78
+
79
+ Logic Assessment:
80
+ - <Assessment of the Planner's reasoning quality>
81
+ - Logical gaps found: <list or "none">
82
+
83
+ Bias & Manipulation Flags:
84
+ - <Any detected bias, framing, or manipulation patterns>
85
+
86
+ Rumors Detected:
87
+ - [RISK: HIGH/MEDIUM/LOW] <rumor description> — <why it matters>
88
+
89
+ Scams & Red Flags:
90
+ - [SEVERITY: HIGH/MEDIUM/LOW] <scam/red flag> — <evidence>
91
+
92
+ Source Credibility Summary:
93
+ - <source name>: <score or rating> — <basis for rating>
94
+
95
+ Uncertainty Assessment:
96
+ - Data Completeness: <rating>
97
+ - Source Reliability: <rating>
98
+ - Temporal Validity: <rating>
99
+ - Overall Uncertainty: HIGH / MEDIUM / LOW
100
+ - Key Uncertainty Factors:
101
+ → <factor 1>
102
+ → <factor 2>
103
+
104
+ Corrections & Recommendations:
105
+ - <specific correction or improvement>
106
+
107
+ Approved: YES / YES WITH CAVEATS / NO
108
+ Approval Notes: <brief explanation>
109
+
110
+ Confidence: <0.0 to 1.0>
111
+ Reasoning: <one sentence explaining your confidence in this verification>
backend/app/routers/finance.py ADDED
@@ -0,0 +1,297 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Finance Intelligence Router
3
+
4
+ Exposes domain pack capabilities directly as API endpoints:
5
+ - Ticker/company intelligence (quote + overview + news)
6
+ - Stance detection (bullish/bearish)
7
+ - Scam detection
8
+ - Rumor detection
9
+ - Source credibility
10
+ - Event impact analysis
11
+ - AI signal synthesis
12
+ """
13
+
14
+ import logging
15
+ from typing import Optional
16
+ from fastapi import APIRouter, HTTPException
17
+ from pydantic import BaseModel
18
+
19
+ from app.domain_packs.finance.ticker_resolver import extract_tickers, resolve_ticker
20
+ from app.domain_packs.finance.entity_resolver import extract_entities
21
+ from app.domain_packs.finance.stance_detector import detect_stance, analyze_price_action_language
22
+ from app.domain_packs.finance.scam_detector import detect_scam_indicators
23
+ from app.domain_packs.finance.rumor_detector import detect_rumor_indicators
24
+ from app.domain_packs.finance.source_checker import check_source_credibility, aggregate_source_scores
25
+ from app.domain_packs.finance.event_analyzer import analyze_event_impact, detect_event_type
26
+ from app.domain_packs.finance.market_data import get_quote, get_company_overview, search_symbol
27
+ from app.domain_packs.finance.news import get_company_news, get_top_headlines
28
+ from app.domain_packs.finance.prediction import structure_prediction_context, suggest_simulation_scenarios
29
+ from app.agents._model import call_model
30
+
31
+ logger = logging.getLogger(__name__)
32
+ router = APIRouter(prefix="/finance", tags=["finance"])
33
+
34
+
35
+ class AnalyzeTextRequest(BaseModel):
36
+ text: str
37
+ sources: list[str] = []
38
+
39
+
40
+ class TickerRequest(BaseModel):
41
+ symbol: str
42
+
43
+
44
+ class NewsAnalysisRequest(BaseModel):
45
+ query: str
46
+ limit: int = 8
47
+
48
+
49
+ # ── Text Intelligence ─────────────────────────────────────────────────────────
50
+
51
+ @router.post("/analyze/text")
52
+ def analyze_text(req: AnalyzeTextRequest):
53
+ """
54
+ Run all domain pack analyzers on a piece of text.
55
+ Returns stance, scam score, rumor score, entities, tickers, events.
56
+ """
57
+ try:
58
+ text = req.text
59
+ sources = req.sources
60
+
61
+ tickers = extract_tickers(text)
62
+ entities = extract_entities(text)
63
+ stance = detect_stance(text)
64
+ price_action = analyze_price_action_language(text)
65
+ scam = detect_scam_indicators(text)
66
+ rumor = detect_rumor_indicators(text)
67
+ events = detect_event_type(text)
68
+ event_impact = analyze_event_impact(text, events)
69
+ source_assessment = aggregate_source_scores(sources) if sources else None
70
+
71
+ return {
72
+ "tickers": tickers,
73
+ "entities": [e for e in entities if e.get("confidence", 0) >= 0.7],
74
+ "stance": stance,
75
+ "price_action": price_action,
76
+ "scam_detection": scam,
77
+ "rumor_detection": rumor,
78
+ "event_impact": event_impact,
79
+ "source_assessment": source_assessment,
80
+ }
81
+ except Exception as e:
82
+ logger.error(f"Text analysis failed: {e}")
83
+ raise HTTPException(status_code=500, detail="Analysis failed")
84
+
85
+
86
+ # ── Ticker Intelligence ───────────────────────────────────────────────────────
87
+
88
+ @router.get("/ticker/{symbol}")
89
+ def ticker_intelligence(symbol: str):
90
+ """
91
+ Full intelligence package for a ticker:
92
+ quote + company overview + recent news + AI signal
93
+ """
94
+ symbol = symbol.upper().strip()
95
+ try:
96
+ quote = get_quote(symbol)
97
+ overview = get_company_overview(symbol)
98
+
99
+ company_name = overview.get("Name") or symbol
100
+ news = get_company_news(company_name, days_back=7)
101
+
102
+ # Run stance detection on news headlines
103
+ headlines_text = " ".join(
104
+ a.get("title", "") + " " + (a.get("description") or "")
105
+ for a in news[:10]
106
+ )
107
+ stance = detect_stance(headlines_text) if headlines_text.strip() else {"stance": "neutral", "confidence": 0.3, "sentiment_score": 0.5}
108
+ events = detect_event_type(headlines_text) if headlines_text.strip() else []
109
+ event_impact = analyze_event_impact(headlines_text, events) if headlines_text.strip() else {}
110
+
111
+ # Build AI signal using LLM
112
+ ai_signal = _generate_ai_signal(symbol, company_name, quote, overview, news[:5], stance, events)
113
+
114
+ return {
115
+ "symbol": symbol,
116
+ "company_name": company_name,
117
+ "quote": quote,
118
+ "overview": {
119
+ "sector": overview.get("Sector"),
120
+ "industry": overview.get("Industry"),
121
+ "market_cap": overview.get("MarketCapitalization"),
122
+ "pe_ratio": overview.get("PERatio"),
123
+ "52_week_high": overview.get("52WeekHigh"),
124
+ "52_week_low": overview.get("52WeekLow"),
125
+ "analyst_target": overview.get("AnalystTargetPrice"),
126
+ "description": (overview.get("Description") or "")[:400],
127
+ },
128
+ "news": [
129
+ {
130
+ "title": a.get("title"),
131
+ "source": a.get("source", {}).get("name"),
132
+ "url": a.get("url"),
133
+ "published_at": a.get("publishedAt"),
134
+ "description": (a.get("description") or "")[:200],
135
+ }
136
+ for a in news[:8]
137
+ ],
138
+ "stance": stance,
139
+ "event_impact": event_impact,
140
+ "ai_signal": ai_signal,
141
+ }
142
+ except Exception as e:
143
+ logger.error(f"Ticker intelligence failed for {symbol}: {e}")
144
+ raise HTTPException(status_code=500, detail=f"Failed to fetch intelligence for {symbol}")
145
+
146
+
147
+ @router.get("/search/{query}")
148
+ def search_ticker(query: str):
149
+ """Search for ticker symbols by company name or keyword."""
150
+ try:
151
+ results = search_symbol(query)
152
+ return [
153
+ {
154
+ "symbol": r.get("1. symbol"),
155
+ "name": r.get("2. name"),
156
+ "type": r.get("3. type"),
157
+ "region": r.get("4. region"),
158
+ "currency": r.get("8. currency"),
159
+ }
160
+ for r in results[:8]
161
+ ]
162
+ except Exception as e:
163
+ logger.error(f"Symbol search failed: {e}")
164
+ return []
165
+
166
+
167
+ # ── News Intelligence ─────────────────────────────────────────────────────────
168
+
169
+ @router.post("/news/analyze")
170
+ def analyze_news(req: NewsAnalysisRequest):
171
+ """
172
+ Fetch news for a query and run full intelligence analysis on each article.
173
+ """
174
+ try:
175
+ articles = get_company_news(req.query, days_back=7)
176
+ if not articles:
177
+ from app.domain_packs.finance.news import search_news
178
+ articles = search_news(req.query, page_size=req.limit)
179
+
180
+ analyzed = []
181
+ for article in articles[:req.limit]:
182
+ text = (article.get("title") or "") + " " + (article.get("description") or "")
183
+ url = article.get("url", "")
184
+
185
+ stance = detect_stance(text)
186
+ scam = detect_scam_indicators(text)
187
+ rumor = detect_rumor_indicators(text)
188
+ source = check_source_credibility(url) if url else None
189
+
190
+ analyzed.append({
191
+ "title": article.get("title"),
192
+ "source": article.get("source", {}).get("name"),
193
+ "url": url,
194
+ "published_at": article.get("publishedAt"),
195
+ "description": (article.get("description") or "")[:200],
196
+ "stance": stance.get("stance"),
197
+ "sentiment_score": stance.get("sentiment_score"),
198
+ "scam_score": scam.get("scam_score"),
199
+ "rumor_score": rumor.get("rumor_score"),
200
+ "source_credibility": source.get("credibility_score") if source else 0.5,
201
+ })
202
+
203
+ return {"query": req.query, "articles": analyzed, "total": len(analyzed)}
204
+ except Exception as e:
205
+ logger.error(f"News analysis failed: {e}")
206
+ raise HTTPException(status_code=500, detail="News analysis failed")
207
+
208
+
209
+ @router.get("/headlines")
210
+ def get_headlines():
211
+ """Get top business headlines with full intelligence analysis."""
212
+ try:
213
+ articles = get_top_headlines(category="business", page_size=10)
214
+ analyzed = []
215
+ for article in articles[:10]:
216
+ text = (article.get("title") or "") + " " + (article.get("description") or "")
217
+ url = article.get("url", "")
218
+
219
+ stance = detect_stance(text)
220
+ scam = detect_scam_indicators(text)
221
+ rumor = detect_rumor_indicators(text)
222
+ source = check_source_credibility(url) if url else None
223
+
224
+ analyzed.append({
225
+ "title": article.get("title"),
226
+ "source": article.get("source", {}).get("name"),
227
+ "url": url,
228
+ "published_at": article.get("publishedAt"),
229
+ "description": (article.get("description") or "")[:200],
230
+ "stance": stance.get("stance"),
231
+ "sentiment_score": stance.get("sentiment_score"),
232
+ "scam_score": scam.get("scam_score"),
233
+ "rumor_score": rumor.get("rumor_score"),
234
+ "source_credibility": source.get("credibility_score") if source else 0.5,
235
+ })
236
+ return analyzed
237
+ except Exception as e:
238
+ logger.error(f"Headlines fetch failed: {e}")
239
+ return []
240
+
241
+
242
+ # ── AI Signal Generator ───────────────────────────────────────────────────────
243
+
244
+ def _generate_ai_signal(
245
+ symbol: str,
246
+ company_name: str,
247
+ quote: dict,
248
+ overview: dict,
249
+ news: list,
250
+ stance: dict,
251
+ events: list,
252
+ ) -> dict:
253
+ """Generate an AI trading signal using the LLM."""
254
+ try:
255
+ price = quote.get("05. price", "N/A")
256
+ change_pct = quote.get("10. change percent", "N/A")
257
+ headlines = "\n".join(f"- {a.get('title', '')}" for a in news[:5])
258
+ event_names = ", ".join(e.get("event_type", "") for e in events[:3]) or "none detected"
259
+
260
+ prompt = f"""You are a financial intelligence analyst. Analyze this data and give a concise signal.
261
+
262
+ Company: {company_name} ({symbol})
263
+ Current Price: {price}
264
+ Change Today: {change_pct}
265
+ Market Stance from News: {stance.get('stance')} (confidence: {stance.get('confidence', 0):.0%})
266
+ Key Events Detected: {event_names}
267
+
268
+ Recent Headlines:
269
+ {headlines}
270
+
271
+ Respond in this exact JSON format (no markdown, no extra text):
272
+ {{
273
+ "signal": "BUY" | "SELL" | "HOLD" | "WATCH",
274
+ "conviction": 0.0-1.0,
275
+ "reasoning": "one sentence max",
276
+ "risk": "LOW" | "MEDIUM" | "HIGH",
277
+ "timeframe": "short-term" | "medium-term" | "long-term"
278
+ }}"""
279
+
280
+ raw = call_model(prompt, mode="chat")
281
+
282
+ # Parse JSON from response
283
+ import json, re
284
+ cleaned = raw.strip()
285
+ if cleaned.startswith("```"):
286
+ cleaned = re.sub(r"```[a-z]*\n?", "", cleaned).strip()
287
+ data = json.loads(cleaned)
288
+ return data
289
+ except Exception as e:
290
+ logger.warning(f"AI signal generation failed: {e}")
291
+ return {
292
+ "signal": "WATCH",
293
+ "conviction": 0.3,
294
+ "reasoning": "Insufficient data for confident signal",
295
+ "risk": "MEDIUM",
296
+ "timeframe": "short-term",
297
+ }
backend/app/routers/learning.py ADDED
@@ -0,0 +1,282 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Learning API endpoints for autonomous knowledge evolution.
3
+ """
4
+
5
+ import asyncio
6
+ import logging
7
+ from fastapi import APIRouter, HTTPException
8
+ from typing import List, Optional
9
+
10
+ from ..schemas import (
11
+ LearningStatusResponse,
12
+ LearningInsightsResponse,
13
+ KnowledgeIngestionRequest,
14
+ KnowledgeItem,
15
+ Skill,
16
+ SkillDistillRequest,
17
+ SourceTrust,
18
+ PromptVersion,
19
+ )
20
+ from ..config import get_config
21
+ from ..services.learning import (
22
+ KnowledgeIngestor,
23
+ KnowledgeStore,
24
+ LearningEngine,
25
+ PromptOptimizer,
26
+ SkillDistiller,
27
+ TrustManager,
28
+ LearningScheduler,
29
+ )
30
+ from ..agents._model import call_model
31
+
32
+ logger = logging.getLogger(__name__)
33
+ router = APIRouter(prefix="/learning", tags=["learning"])
34
+
35
+ # Global instances - initialized via init_learning_services()
36
+ learning_engine: Optional[LearningEngine] = None
37
+ knowledge_store: Optional[KnowledgeStore] = None
38
+ prompt_optimizer: Optional[PromptOptimizer] = None
39
+ skill_distiller: Optional[SkillDistiller] = None
40
+ trust_manager: Optional[TrustManager] = None
41
+ scheduler: Optional[LearningScheduler] = None
42
+
43
+
44
+ async def _async_call_model(prompt: str, max_tokens: int = 1000) -> str:
45
+ """Async wrapper around synchronous call_model."""
46
+ return call_model(prompt, mode="chat")
47
+
48
+
49
+ def init_learning_services(config):
50
+ """Initialize all learning services. Called from main.py on startup."""
51
+ global learning_engine, knowledge_store, prompt_optimizer, skill_distiller, trust_manager, scheduler
52
+
53
+ knowledge_store = KnowledgeStore(
54
+ data_dir=config.data_dir,
55
+ max_size_mb=config.knowledge_max_size_mb,
56
+ )
57
+
58
+ knowledge_ingestor = KnowledgeIngestor(
59
+ tavily_key=config.tavily_api_key,
60
+ newsapi_key=config.newsapi_key,
61
+ model_fn=_async_call_model,
62
+ )
63
+
64
+ prompt_optimizer = PromptOptimizer(
65
+ data_dir=config.data_dir,
66
+ model_fn=_async_call_model,
67
+ )
68
+
69
+ skill_distiller = SkillDistiller(
70
+ data_dir=config.data_dir,
71
+ model_fn=_async_call_model,
72
+ )
73
+
74
+ trust_manager = TrustManager(data_dir=config.data_dir)
75
+
76
+ learning_engine = LearningEngine(
77
+ knowledge_store=knowledge_store,
78
+ knowledge_ingestor=knowledge_ingestor,
79
+ prompt_optimizer=prompt_optimizer,
80
+ skill_distiller=skill_distiller,
81
+ trust_manager=trust_manager,
82
+ )
83
+
84
+ scheduler = LearningScheduler(
85
+ max_cpu_percent=50.0,
86
+ min_battery_percent=30.0,
87
+ check_interval_seconds=60,
88
+ )
89
+
90
+ if config.learning_enabled:
91
+ # Task 1: Knowledge ingestion (every 6 hours)
92
+ scheduler.schedule_task(
93
+ "knowledge_ingestion",
94
+ lambda: learning_engine.run_knowledge_ingestion(config.learning_topics),
95
+ interval_hours=config.learning_schedule_interval,
96
+ )
97
+ # Task 2: Expired knowledge cleanup (daily)
98
+ scheduler.schedule_task(
99
+ "cleanup",
100
+ lambda: learning_engine.run_cleanup(expiration_days=30),
101
+ interval_hours=24,
102
+ )
103
+ # Task 3: Pattern detection (daily)
104
+ async def _run_pattern_detection():
105
+ return learning_engine.detect_patterns()
106
+
107
+ scheduler.schedule_task(
108
+ "pattern_detection",
109
+ _run_pattern_detection,
110
+ interval_hours=24,
111
+ )
112
+ # Task 4: Skill distillation (weekly)
113
+ scheduler.schedule_task(
114
+ "skill_distillation",
115
+ lambda: learning_engine.run_skill_distillation(min_frequency=3),
116
+ interval_hours=168,
117
+ )
118
+ # Task 5: Prompt optimization (weekly)
119
+ scheduler.schedule_task(
120
+ "prompt_optimization",
121
+ lambda: learning_engine.run_prompt_optimization(
122
+ ["research", "planner", "verifier", "synthesizer"]
123
+ ),
124
+ interval_hours=168,
125
+ )
126
+
127
+ logger.info("Learning services initialized with all scheduled tasks")
128
+
129
+
130
+ def start_scheduler_background():
131
+ """Start the learning scheduler as a background asyncio task."""
132
+ if scheduler and not scheduler.running:
133
+ asyncio.create_task(scheduler.start())
134
+ logger.info("Learning scheduler started in background")
135
+
136
+
137
+ # ── Status ────────────────────────────────────────────────────────────────────
138
+
139
+ @router.get("/status")
140
+ async def get_learning_status():
141
+ if not learning_engine:
142
+ raise HTTPException(status_code=503, detail="Learning engine not initialized")
143
+
144
+ status = learning_engine.get_status()
145
+
146
+ # Include scheduler status
147
+ if scheduler:
148
+ status["scheduler"] = scheduler.get_status()
149
+
150
+ return status
151
+
152
+
153
+ @router.post("/run-once")
154
+ async def run_learning_once(task_name: str):
155
+ if not scheduler:
156
+ raise HTTPException(status_code=503, detail="Scheduler not initialized")
157
+ try:
158
+ return await scheduler.run_once(task_name)
159
+ except ValueError as e:
160
+ raise HTTPException(status_code=404, detail=str(e))
161
+
162
+
163
+ @router.get("/insights")
164
+ async def get_learning_insights():
165
+ if not learning_engine:
166
+ raise HTTPException(status_code=503, detail="Learning engine not initialized")
167
+ return learning_engine.get_insights()
168
+
169
+
170
+ # ── Knowledge (fixed-path routes BEFORE parameterised ones) ─────────────────
171
+
172
+ @router.get("/knowledge")
173
+ async def list_knowledge(limit: Optional[int] = 50):
174
+ if not knowledge_store:
175
+ raise HTTPException(status_code=503, detail="Knowledge store not initialized")
176
+ return knowledge_store.list_all(limit=limit)
177
+
178
+
179
+ @router.post("/knowledge/ingest")
180
+ async def ingest_knowledge(request: KnowledgeIngestionRequest):
181
+ if not learning_engine:
182
+ raise HTTPException(status_code=503, detail="Learning engine not initialized")
183
+ return await learning_engine.run_knowledge_ingestion(request.topics)
184
+
185
+
186
+ @router.get("/knowledge/search")
187
+ async def search_knowledge(query: str, limit: int = 10):
188
+ if not knowledge_store:
189
+ raise HTTPException(status_code=503, detail="Knowledge store not initialized")
190
+ return knowledge_store.search_knowledge(query, limit=limit)
191
+
192
+
193
+ @router.get("/knowledge/{item_id}")
194
+ async def get_knowledge_item(item_id: str):
195
+ if not knowledge_store:
196
+ raise HTTPException(status_code=503, detail="Knowledge store not initialized")
197
+ item = knowledge_store.get_knowledge(item_id)
198
+ if not item:
199
+ raise HTTPException(status_code=404, detail="Knowledge item not found")
200
+ return item
201
+
202
+
203
+ # ── Skills (fixed-path routes BEFORE parameterised ones) ────────────────────
204
+
205
+ @router.get("/skills")
206
+ async def list_skills():
207
+ if not skill_distiller:
208
+ raise HTTPException(status_code=503, detail="Skill distiller not initialized")
209
+ return skill_distiller.list_skills()
210
+
211
+
212
+ @router.post("/skills/distill")
213
+ async def distill_skills(request: SkillDistillRequest):
214
+ if not skill_distiller:
215
+ raise HTTPException(status_code=503, detail="Skill distiller not initialized")
216
+ from ..services.case_store import list_cases
217
+ cases = list_cases(limit=100)
218
+ candidates = skill_distiller.detect_skill_candidates(cases, min_frequency=request.min_frequency)
219
+ skills = []
220
+ for candidate in candidates[:5]:
221
+ example_cases = [c for c in cases if c.get("route", {}) and c.get("route", {}).get("domain_pack") == candidate.get("domain")][:3]
222
+ skill = await skill_distiller.distill_skill(candidate, example_cases)
223
+ skills.append(skill)
224
+ return {"candidates_found": len(candidates), "skills_distilled": len(skills), "skills": skills}
225
+
226
+
227
+ @router.get("/skills/{skill_id}")
228
+ async def get_skill(skill_id: str):
229
+ if not skill_distiller:
230
+ raise HTTPException(status_code=503, detail="Skill distiller not initialized")
231
+ skill = skill_distiller.get_skill(skill_id)
232
+ if not skill:
233
+ raise HTTPException(status_code=404, detail="Skill not found")
234
+ return skill
235
+
236
+
237
+ # ── Trust & Freshness ─────────────────────────────────────────────────────────
238
+
239
+ @router.get("/sources/trust")
240
+ async def get_trusted_sources(min_trust: float = 0.7, min_verifications: int = 3):
241
+ if not trust_manager:
242
+ raise HTTPException(status_code=503, detail="Trust manager not initialized")
243
+ return trust_manager.list_trusted_sources(min_trust=min_trust, min_verifications=min_verifications)
244
+
245
+
246
+ @router.get("/sources/freshness")
247
+ async def get_stale_items(threshold: float = 0.3):
248
+ if not trust_manager or not knowledge_store:
249
+ raise HTTPException(status_code=503, detail="Services not initialized")
250
+ items = knowledge_store.list_all()
251
+ return trust_manager.get_stale_items(items, threshold=threshold)
252
+
253
+
254
+ # ── Prompt Evolution (fixed-path routes BEFORE parameterised ones) ───────────
255
+
256
+ @router.post("/prompts/optimize/{name}")
257
+ async def optimize_prompt(name: str, goal: str):
258
+ if not prompt_optimizer:
259
+ raise HTTPException(status_code=503, detail="Prompt optimizer not initialized")
260
+ from ..services.prompt_store import get_prompt
261
+ prompt_data = get_prompt(name)
262
+ if not prompt_data:
263
+ raise HTTPException(status_code=404, detail=f"Prompt '{name}' not found")
264
+ current_prompt = prompt_data["content"]
265
+ return await prompt_optimizer.create_prompt_variant(name, current_prompt, goal)
266
+
267
+
268
+ @router.post("/prompts/promote/{name}/{version}")
269
+ async def promote_prompt_version(name: str, version: str):
270
+ if not prompt_optimizer:
271
+ raise HTTPException(status_code=503, detail="Prompt optimizer not initialized")
272
+ success = prompt_optimizer.promote_prompt(version)
273
+ if not success:
274
+ raise HTTPException(status_code=400, detail="Promotion criteria not met (need ≥10 tests and ≥70% win rate)")
275
+ return {"status": "promoted", "variant_id": version}
276
+
277
+
278
+ @router.get("/prompts/versions/{name}")
279
+ async def get_prompt_versions(name: str):
280
+ if not prompt_optimizer:
281
+ raise HTTPException(status_code=503, detail="Prompt optimizer not initialized")
282
+ return prompt_optimizer.list_versions(name)
backend/app/routers/sentinel.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Router - API Endpoints
3
+
4
+ Provides REST API for sentinel monitoring and control.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ from typing import List, Dict, Any
10
+
11
+ from fastapi import APIRouter, HTTPException
12
+ from pydantic import BaseModel
13
+
14
+ from app.services.sentinel.sentinel_engine import SentinelEngine
15
+ from app.services.sentinel.patcher import SentinelPatcher
16
+ from app.services.sentinel.capability_tracker import CapabilityTracker
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ router = APIRouter(prefix="/sentinel", tags=["sentinel"])
21
+
22
+ # Initialize services
23
+ engine = SentinelEngine()
24
+ patcher = SentinelPatcher()
25
+ tracker = CapabilityTracker()
26
+
27
+
28
+ # ── Request/Response Models ───────────────────────────────────────────────────
29
+
30
+ class PatchApprovalRequest(BaseModel):
31
+ """Request to approve a pending patch."""
32
+ pass
33
+
34
+
35
+ class PatchRejectionRequest(BaseModel):
36
+ """Request to reject a pending patch."""
37
+ pass
38
+
39
+
40
+ # ── Helper Functions ──────────────────────────────────────────────────────────
41
+
42
+ def _disabled_response():
43
+ """Return response when sentinel is disabled."""
44
+ return {
45
+ "sentinel_enabled": False,
46
+ "message": "Sentinel layer is disabled. Set SENTINEL_ENABLED=true to enable."
47
+ }
48
+
49
+
50
+ def _check_enabled():
51
+ """Check if sentinel is enabled, raise HTTPException if not."""
52
+ if not os.getenv("SENTINEL_ENABLED", "true").lower() == "true":
53
+ raise HTTPException(status_code=503, detail="Sentinel layer is disabled")
54
+
55
+
56
+ # ── Endpoints ─────────────────────────────────────────────────────────────────
57
+
58
+ @router.get("/status")
59
+ def get_status():
60
+ """
61
+ Get current sentinel status.
62
+
63
+ Returns:
64
+ Status dict with health metrics
65
+ """
66
+ if not os.getenv("SENTINEL_ENABLED", "true").lower() == "true":
67
+ return _disabled_response()
68
+
69
+ try:
70
+ return engine.get_status()
71
+ except Exception as e:
72
+ logger.error(f"Failed to get sentinel status: {e}")
73
+ raise HTTPException(status_code=500, detail="Failed to get sentinel status")
74
+
75
+
76
+ @router.get("/alerts")
77
+ def get_alerts(limit: int = 50):
78
+ """
79
+ Get recent alerts.
80
+
81
+ Args:
82
+ limit: Maximum number of alerts to return
83
+
84
+ Returns:
85
+ List of recent alerts
86
+ """
87
+ _check_enabled()
88
+
89
+ try:
90
+ from pathlib import Path
91
+ import json
92
+
93
+ sentinel_dir = Path("backend/app/data/sentinel")
94
+ alert_file = sentinel_dir / "alert_history.json"
95
+
96
+ if not alert_file.exists():
97
+ return []
98
+
99
+ with open(alert_file, 'r') as f:
100
+ alerts = json.load(f)
101
+
102
+ # Return most recent alerts
103
+ return alerts[-limit:] if len(alerts) > limit else alerts
104
+
105
+ except Exception as e:
106
+ logger.error(f"Failed to get alerts: {e}")
107
+ raise HTTPException(status_code=500, detail="Failed to get alerts")
108
+
109
+
110
+ @router.get("/patches")
111
+ def get_patches(limit: int = 50):
112
+ """
113
+ Get recent patches.
114
+
115
+ Args:
116
+ limit: Maximum number of patches to return
117
+
118
+ Returns:
119
+ List of recent patches
120
+ """
121
+ _check_enabled()
122
+
123
+ try:
124
+ from pathlib import Path
125
+ import json
126
+
127
+ sentinel_dir = Path("backend/app/data/sentinel")
128
+ patch_file = sentinel_dir / "patch_history.json"
129
+
130
+ if not patch_file.exists():
131
+ return []
132
+
133
+ with open(patch_file, 'r') as f:
134
+ patches = json.load(f)
135
+
136
+ # Return most recent patches
137
+ return patches[-limit:] if len(patches) > limit else patches
138
+
139
+ except Exception as e:
140
+ logger.error(f"Failed to get patches: {e}")
141
+ raise HTTPException(status_code=500, detail="Failed to get patches")
142
+
143
+
144
+ @router.get("/patches/pending")
145
+ def get_pending_patches():
146
+ """
147
+ Get pending patches awaiting human review.
148
+
149
+ Returns:
150
+ List of pending patches
151
+ """
152
+ _check_enabled()
153
+
154
+ try:
155
+ from pathlib import Path
156
+ import json
157
+
158
+ sentinel_dir = Path("backend/app/data/sentinel")
159
+ pending_dir = sentinel_dir / "pending_patches"
160
+
161
+ if not pending_dir.exists():
162
+ return []
163
+
164
+ pending = []
165
+
166
+ for patch_file in pending_dir.glob("*.patch.json"):
167
+ try:
168
+ with open(patch_file, 'r') as f:
169
+ patch_data = json.load(f)
170
+ pending.append(patch_data)
171
+ except Exception as e:
172
+ logger.warning(f"Failed to read pending patch {patch_file}: {e}")
173
+ continue
174
+
175
+ return pending
176
+
177
+ except Exception as e:
178
+ logger.error(f"Failed to get pending patches: {e}")
179
+ raise HTTPException(status_code=500, detail="Failed to get pending patches")
180
+
181
+
182
+ @router.post("/patches/{patch_id}/approve")
183
+ def approve_patch(patch_id: str, request: PatchApprovalRequest):
184
+ """
185
+ Approve and apply a pending patch.
186
+
187
+ Args:
188
+ patch_id: ID of pending patch
189
+ request: Approval request (empty body)
190
+
191
+ Returns:
192
+ PatchResult
193
+ """
194
+ _check_enabled()
195
+
196
+ try:
197
+ result = patcher.approve_pending(patch_id)
198
+
199
+ if result is None:
200
+ raise HTTPException(status_code=404, detail="Pending patch not found")
201
+
202
+ return result.to_dict()
203
+
204
+ except HTTPException:
205
+ raise
206
+ except Exception as e:
207
+ logger.error(f"Failed to approve patch {patch_id}: {e}")
208
+ raise HTTPException(status_code=500, detail="Failed to approve patch")
209
+
210
+
211
+ @router.post("/patches/{patch_id}/reject")
212
+ def reject_patch(patch_id: str, request: PatchRejectionRequest):
213
+ """
214
+ Reject a pending patch.
215
+
216
+ Args:
217
+ patch_id: ID of pending patch
218
+ request: Rejection request (empty body)
219
+
220
+ Returns:
221
+ Success message
222
+ """
223
+ _check_enabled()
224
+
225
+ try:
226
+ success = patcher.reject_pending(patch_id)
227
+
228
+ if not success:
229
+ raise HTTPException(status_code=404, detail="Pending patch not found")
230
+
231
+ return {"rejected": True, "patch_id": patch_id}
232
+
233
+ except HTTPException:
234
+ raise
235
+ except Exception as e:
236
+ logger.error(f"Failed to reject patch {patch_id}: {e}")
237
+ raise HTTPException(status_code=500, detail="Failed to reject patch")
238
+
239
+
240
+ @router.get("/capability")
241
+ def get_capability_trend(days: int = 7):
242
+ """
243
+ Get capability trend over specified days.
244
+
245
+ Args:
246
+ days: Number of days to look back
247
+
248
+ Returns:
249
+ List of capability snapshots
250
+ """
251
+ _check_enabled()
252
+
253
+ try:
254
+ return tracker.trend(days=days)
255
+
256
+ except Exception as e:
257
+ logger.error(f"Failed to get capability trend: {e}")
258
+ raise HTTPException(status_code=500, detail="Failed to get capability trend")
259
+
260
+
261
+ @router.get("/capability/current")
262
+ def get_current_capability():
263
+ """
264
+ Get current capability snapshot.
265
+
266
+ Returns:
267
+ Current capability snapshot
268
+ """
269
+ _check_enabled()
270
+
271
+ try:
272
+ snapshot = tracker.snapshot()
273
+ return snapshot.to_dict()
274
+
275
+ except Exception as e:
276
+ logger.error(f"Failed to get current capability: {e}")
277
+ raise HTTPException(status_code=500, detail="Failed to get current capability")
278
+
279
+
280
+ @router.post("/run-now")
281
+ def run_cycle_now():
282
+ """
283
+ Trigger a sentinel cycle immediately.
284
+
285
+ Returns:
286
+ Cycle report
287
+ """
288
+ _check_enabled()
289
+
290
+ try:
291
+ report = engine.run_cycle()
292
+ return report.dict()
293
+
294
+ except Exception as e:
295
+ logger.error(f"Failed to run sentinel cycle: {e}")
296
+ raise HTTPException(status_code=500, detail="Failed to run sentinel cycle")
297
+
298
+
299
+ @router.get("/cycle-history")
300
+ def get_cycle_history(limit: int = 20):
301
+ """
302
+ Get recent cycle history.
303
+
304
+ Args:
305
+ limit: Maximum number of cycles to return
306
+
307
+ Returns:
308
+ List of recent cycle reports
309
+ """
310
+ _check_enabled()
311
+
312
+ try:
313
+ from pathlib import Path
314
+ import json
315
+
316
+ sentinel_dir = Path("backend/app/data/sentinel")
317
+ history_file = sentinel_dir / "cycle_history.json"
318
+
319
+ if not history_file.exists():
320
+ return []
321
+
322
+ with open(history_file, 'r') as f:
323
+ history = json.load(f)
324
+
325
+ # Return most recent cycles
326
+ return history[-limit:] if len(history) > limit else history
327
+
328
+ except Exception as e:
329
+ logger.error(f"Failed to get cycle history: {e}")
330
+ raise HTTPException(status_code=500, detail="Failed to get cycle history")
backend/app/routers/simulation.py CHANGED
@@ -1,4 +1,5 @@
1
  import uuid
 
2
  from fastapi import APIRouter, HTTPException
3
 
4
  from app.config import MIROFISH_ENABLED
@@ -11,8 +12,9 @@ from app.services.mirofish_client import (
11
  simulation_chat,
12
  MiroFishError,
13
  )
14
- from app.services.simulation_store import save_simulation, get_simulation
15
 
 
16
  router = APIRouter(prefix="/simulation", tags=["simulation"])
17
 
18
 
@@ -21,15 +23,42 @@ def simulation_health():
21
  return mirofish_health()
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
24
  @router.post("/run")
25
  def simulation_run(payload: SimulationRunRequest):
 
 
 
 
 
 
26
  if not MIROFISH_ENABLED:
27
- raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
 
 
 
 
28
 
29
  try:
 
30
  remote = run_simulation(payload.model_dump())
 
31
  except MiroFishError as e:
32
- raise HTTPException(status_code=502, detail=str(e))
 
 
 
 
33
 
34
  simulation_id = (
35
  remote.get("simulation_id")
@@ -38,59 +67,107 @@ def simulation_run(payload: SimulationRunRequest):
38
  or str(uuid.uuid4())
39
  )
40
 
 
41
  record = {
42
  "simulation_id": simulation_id,
43
  "status": remote.get("status", "submitted"),
44
  "title": payload.title,
45
  "prediction_goal": payload.prediction_goal,
46
  "remote_payload": remote,
 
47
  }
48
  save_simulation(simulation_id, record)
 
49
  return record
50
 
51
 
52
  @router.get("/{simulation_id}")
53
  def simulation_status_endpoint(simulation_id: str):
 
 
 
 
 
54
  if not MIROFISH_ENABLED:
55
- raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
 
 
 
 
56
 
57
  try:
 
58
  remote = simulation_status(simulation_id)
59
  except MiroFishError as e:
60
- raise HTTPException(status_code=502, detail=str(e))
 
 
 
 
61
 
62
  local = get_simulation(simulation_id) or {}
63
  merged = {**local, **remote, "simulation_id": simulation_id}
64
  save_simulation(simulation_id, merged)
 
65
  return merged
66
 
67
 
68
  @router.get("/{simulation_id}/report")
69
  def simulation_report_endpoint(simulation_id: str):
 
 
 
 
 
70
  if not MIROFISH_ENABLED:
71
- raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
 
 
 
 
72
 
73
  try:
 
74
  report = simulation_report(simulation_id)
75
  except MiroFishError as e:
76
- raise HTTPException(status_code=502, detail=str(e))
 
 
 
 
77
 
78
  local = get_simulation(simulation_id) or {}
79
  merged = {**local, "report": report, "simulation_id": simulation_id}
80
  save_simulation(simulation_id, merged)
 
81
  return merged
82
 
83
 
84
  @router.post("/{simulation_id}/chat")
85
  def simulation_chat_endpoint(simulation_id: str, payload: SimulationChatRequest):
 
 
 
 
 
86
  if not MIROFISH_ENABLED:
87
- raise HTTPException(status_code=400, detail="MiroFish integration is disabled")
 
 
 
 
88
 
89
  try:
 
90
  result = simulation_chat(simulation_id, payload.message)
91
  except MiroFishError as e:
92
- raise HTTPException(status_code=502, detail=str(e))
 
 
 
 
93
 
 
94
  return {
95
  "simulation_id": simulation_id,
96
  "message": payload.message,
 
1
  import uuid
2
+ import logging
3
  from fastapi import APIRouter, HTTPException
4
 
5
  from app.config import MIROFISH_ENABLED
 
12
  simulation_chat,
13
  MiroFishError,
14
  )
15
+ from app.services.simulation_store import save_simulation, get_simulation, list_simulations
16
 
17
+ logger = logging.getLogger(__name__)
18
  router = APIRouter(prefix="/simulation", tags=["simulation"])
19
 
20
 
 
23
  return mirofish_health()
24
 
25
 
26
+ @router.get("/list")
27
+ def simulation_list():
28
+ """
29
+ List all simulations.
30
+
31
+ Returns a list of all stored simulations with their status.
32
+ """
33
+ simulations = list_simulations()
34
+ return simulations
35
+
36
+
37
  @router.post("/run")
38
  def simulation_run(payload: SimulationRunRequest):
39
+ """
40
+ Submit a simulation request to MiroFish.
41
+
42
+ This endpoint creates a case-linked simulation that can be tracked
43
+ and referenced from case execution records.
44
+ """
45
  if not MIROFISH_ENABLED:
46
+ logger.warning("Simulation request rejected: MiroFish is disabled")
47
+ raise HTTPException(
48
+ status_code=400,
49
+ detail="MiroFish simulation service is not enabled. Please enable MIROFISH_ENABLED in configuration."
50
+ )
51
 
52
  try:
53
+ logger.info(f"Submitting simulation: {payload.title}")
54
  remote = run_simulation(payload.model_dump())
55
+ logger.info(f"Simulation submitted successfully")
56
  except MiroFishError as e:
57
+ logger.error(f"MiroFish simulation submission failed: {e}")
58
+ raise HTTPException(
59
+ status_code=502,
60
+ detail=f"MiroFish service error: {str(e)}. Please check if MiroFish service is running at the configured endpoint."
61
+ )
62
 
63
  simulation_id = (
64
  remote.get("simulation_id")
 
67
  or str(uuid.uuid4())
68
  )
69
 
70
+ # Store simulation with case linking support
71
  record = {
72
  "simulation_id": simulation_id,
73
  "status": remote.get("status", "submitted"),
74
  "title": payload.title,
75
  "prediction_goal": payload.prediction_goal,
76
  "remote_payload": remote,
77
+ "case_id": payload.metadata.get("case_id") if payload.metadata else None, # Link to case if provided
78
  }
79
  save_simulation(simulation_id, record)
80
+ logger.info(f"Simulation {simulation_id} saved locally with case_id: {record.get('case_id')}")
81
  return record
82
 
83
 
84
  @router.get("/{simulation_id}")
85
  def simulation_status_endpoint(simulation_id: str):
86
+ """
87
+ Get the current status of a simulation.
88
+
89
+ Returns merged local and remote status information.
90
+ """
91
  if not MIROFISH_ENABLED:
92
+ logger.warning(f"Simulation status request rejected for {simulation_id}: MiroFish is disabled")
93
+ raise HTTPException(
94
+ status_code=400,
95
+ detail="MiroFish simulation service is not enabled."
96
+ )
97
 
98
  try:
99
+ logger.info(f"Fetching status for simulation {simulation_id}")
100
  remote = simulation_status(simulation_id)
101
  except MiroFishError as e:
102
+ logger.error(f"Failed to fetch simulation status for {simulation_id}: {e}")
103
+ raise HTTPException(
104
+ status_code=502,
105
+ detail=f"Failed to retrieve simulation status from MiroFish: {str(e)}"
106
+ )
107
 
108
  local = get_simulation(simulation_id) or {}
109
  merged = {**local, **remote, "simulation_id": simulation_id}
110
  save_simulation(simulation_id, merged)
111
+ logger.info(f"Simulation {simulation_id} status: {merged.get('status', 'unknown')}")
112
  return merged
113
 
114
 
115
  @router.get("/{simulation_id}/report")
116
  def simulation_report_endpoint(simulation_id: str):
117
+ """
118
+ Get the final report for a completed simulation.
119
+
120
+ Returns the simulation report with analysis and insights.
121
+ """
122
  if not MIROFISH_ENABLED:
123
+ logger.warning(f"Simulation report request rejected for {simulation_id}: MiroFish is disabled")
124
+ raise HTTPException(
125
+ status_code=400,
126
+ detail="MiroFish simulation service is not enabled."
127
+ )
128
 
129
  try:
130
+ logger.info(f"Fetching report for simulation {simulation_id}")
131
  report = simulation_report(simulation_id)
132
  except MiroFishError as e:
133
+ logger.error(f"Failed to fetch simulation report for {simulation_id}: {e}")
134
+ raise HTTPException(
135
+ status_code=502,
136
+ detail=f"Failed to retrieve simulation report from MiroFish: {str(e)}"
137
+ )
138
 
139
  local = get_simulation(simulation_id) or {}
140
  merged = {**local, "report": report, "simulation_id": simulation_id}
141
  save_simulation(simulation_id, merged)
142
+ logger.info(f"Simulation {simulation_id} report retrieved successfully")
143
  return merged
144
 
145
 
146
  @router.post("/{simulation_id}/chat")
147
  def simulation_chat_endpoint(simulation_id: str, payload: SimulationChatRequest):
148
+ """
149
+ Ask follow-up questions about a completed simulation.
150
+
151
+ Enables deep interaction with simulation results.
152
+ """
153
  if not MIROFISH_ENABLED:
154
+ logger.warning(f"Simulation chat request rejected for {simulation_id}: MiroFish is disabled")
155
+ raise HTTPException(
156
+ status_code=400,
157
+ detail="MiroFish simulation service is not enabled."
158
+ )
159
 
160
  try:
161
+ logger.info(f"Simulation chat for {simulation_id}: {payload.message[:50]}...")
162
  result = simulation_chat(simulation_id, payload.message)
163
  except MiroFishError as e:
164
+ logger.error(f"Simulation chat failed for {simulation_id}: {e}")
165
+ raise HTTPException(
166
+ status_code=502,
167
+ detail=f"Failed to chat with simulation: {str(e)}"
168
+ )
169
 
170
+ logger.info(f"Simulation chat response received for {simulation_id}")
171
  return {
172
  "simulation_id": simulation_id,
173
  "message": payload.message,
backend/app/schemas.py CHANGED
@@ -108,21 +108,167 @@ class SimulationStoredRecord(BaseModel):
108
  disk_bytes: int
109
 
110
 
111
- class ConfigStatusResponse(BaseModel):
112
- model_name: str
113
- memory_dir: str
114
- prompts_dir: str
115
- gemini_key_present: bool
 
 
 
 
 
 
 
 
 
 
 
116
 
117
 
118
- class AgentInfo(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  name: str
120
- purpose: str
121
- prompt_name: Optional[str] = None
122
- inputs_required: List[str] = Field(default_factory=list)
123
-
124
-
125
- class DeepHealthResponse(BaseModel):
126
- status: str
127
- version: str
128
- checks: Dict[str, Any]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  disk_bytes: int
109
 
110
 
111
+ # Learning Layer Schemas
112
+
113
+ class KnowledgeItem(BaseModel):
114
+ """Knowledge item with compressed content."""
115
+ id: Optional[str] = None
116
+ source: str = Field(..., description="Source: 'tavily_search', 'jina_reader', 'newsapi'")
117
+ query: Optional[str] = None
118
+ url: Optional[str] = None
119
+ title: Optional[str] = None
120
+ summary: str = Field(..., description="Compressed summary (2-4KB)")
121
+ published_at: Optional[str] = None
122
+ ingested_at: str
123
+ saved_at: Optional[str] = None
124
+ last_accessed: Optional[str] = None
125
+ trust_score: Optional[float] = Field(default=0.5, ge=0.0, le=1.0)
126
+ freshness_score: Optional[float] = Field(default=1.0, ge=0.0, le=1.0)
127
 
128
 
129
+ class CaseLearning(BaseModel):
130
+ """Learning metadata extracted from case execution."""
131
+ case_id: str
132
+ route_effectiveness: Dict[str, Any] = Field(default_factory=dict)
133
+ prompt_performance: Dict[str, Any] = Field(default_factory=dict)
134
+ provider_reliability: Dict[str, Any] = Field(default_factory=dict)
135
+ patterns_detected: List[str] = Field(default_factory=list)
136
+ learned_at: str
137
+
138
+
139
+ class PromptVersion(BaseModel):
140
+ """Prompt version with A/B testing metadata."""
141
+ id: str
142
+ prompt_name: str
143
+ version: int
144
+ prompt_text: str
145
+ goal: str
146
+ status: str = Field(..., description="Status: 'testing', 'production', 'archived'")
147
+ created_at: str
148
+ test_count: int = 0
149
+ win_count: int = 0
150
+ win_rate: float = Field(default=0.0, ge=0.0, le=1.0)
151
+ promoted_at: Optional[str] = None
152
+ archived_at: Optional[str] = None
153
+
154
+
155
+ class Skill(BaseModel):
156
+ """Distilled skill from execution patterns."""
157
+ id: str
158
  name: str
159
+ type: str = Field(..., description="Type: 'domain_expertise', 'preferred_source', 'agent_workflow'")
160
+ description: str
161
+ trigger_patterns: List[str] = Field(default_factory=list)
162
+ recommended_agents: List[str] = Field(default_factory=list)
163
+ preferred_sources: List[str] = Field(default_factory=list)
164
+ expected_outcomes: List[str] = Field(default_factory=list)
165
+ frequency: int
166
+ confidence: float = Field(..., ge=0.0, le=1.0)
167
+ created_at: str
168
+ usage_count: int = 0
169
+ success_count: int = 0
170
+ success_rate: float = Field(default=0.0, ge=0.0, le=1.0)
171
+
172
+
173
+ class SourceTrust(BaseModel):
174
+ """Trust score for a source."""
175
+ source: str
176
+ trust_score: float = Field(..., ge=0.0, le=1.0)
177
+ verification_count: int
178
+ success_count: int
179
+ success_rate: float = Field(..., ge=0.0, le=1.0)
180
+ last_updated: str
181
+
182
+
183
+ class FreshnessScore(BaseModel):
184
+ """Freshness score for a knowledge item."""
185
+ item_id: str
186
+ freshness_score: float = Field(..., ge=0.0, le=1.0)
187
+ age_days: int
188
+ last_updated: str
189
+
190
+
191
+ class LearningStatusResponse(BaseModel):
192
+ """Learning engine status."""
193
+ enabled: bool
194
+ storage: Dict[str, Any]
195
+ last_run: Dict[str, str]
196
+
197
+
198
+ class LearningInsightsResponse(BaseModel):
199
+ """Learning insights."""
200
+ recent_knowledge: List[Dict[str, Any]]
201
+ storage_stats: Dict[str, Any]
202
+
203
+
204
+ class KnowledgeIngestionRequest(BaseModel):
205
+ """Request to ingest knowledge."""
206
+ topics: List[str] = Field(..., description="Topics to ingest knowledge about")
207
+
208
+
209
+ class SkillDistillRequest(BaseModel):
210
+ """Request to distill skills."""
211
+ min_frequency: int = Field(default=3, description="Minimum pattern frequency")
212
+
213
+
214
+ # Sentinel Layer Schemas
215
+
216
+ class SentinelAlert(BaseModel):
217
+ """Sentinel alert from system health scan."""
218
+ alert_id: str
219
+ layer: int = Field(..., description="Layer: 1-6")
220
+ component: str = Field(..., description="Component name")
221
+ issue_type: str = Field(..., description="Issue type: error, degradation, anomaly")
222
+ severity: str = Field(..., description="Severity: low, medium, high, critical")
223
+ raw_evidence: str = Field(..., description="Raw evidence from scan")
224
+ timestamp: str
225
+
226
+
227
+ class SentinelDiagnosis(BaseModel):
228
+ """Sentinel diagnosis of an alert."""
229
+ diagnosis_id: str
230
+ alert_id: str
231
+ root_cause: str
232
+ fix_type: str = Field(..., description="Fix type: config, prompt, logic, dependency, data")
233
+ safe_to_auto_apply: bool
234
+ proposed_fix: str
235
+ confidence: float = Field(..., ge=0.0, le=1.0)
236
+ reasoning: str
237
+ timestamp: str
238
+
239
+
240
+ class PatchResult(BaseModel):
241
+ """Result of patch application."""
242
+ patch_id: str
243
+ applied: bool
244
+ target_file: str
245
+ change_summary: str
246
+ requires_human_review: bool
247
+ timestamp: str
248
+
249
+
250
+ class CapabilitySnapshot(BaseModel):
251
+ """Capability snapshot with AGI progression index."""
252
+ snapshot_id: str
253
+ timestamp: str
254
+ scores: Dict[str, float] = Field(..., description="Dimension scores: reasoning_depth, source_trust_avg, skill_coverage, prompt_win_rate_avg, stability, self_correction_rate")
255
+ # IMPORTANT COMMENT (must appear in schemas):
256
+ # agi_progression_index is a VANITY METRIC for developer motivation.
257
+ # It is a weighted composite of system health indicators:
258
+ # error rate, confidence scores, trust quality, skill growth, etc.
259
+ # It does NOT measure general intelligence, emergent reasoning, or AGI.
260
+ # The name is aspirational and intentionally optimistic, not scientific.
261
+ agi_progression_index: float = Field(..., ge=0.0, le=1.0)
262
+ delta_from_last: Dict[str, float]
263
+
264
+
265
+ class SentinelCycleReport(BaseModel):
266
+ """Report from a sentinel cycle."""
267
+ cycle_id: str
268
+ started_at: str
269
+ completed_at: str
270
+ alerts_found: int
271
+ diagnoses_made: int
272
+ patches_applied: int
273
+ patches_pending_review: int
274
+ capability_snapshot: CapabilitySnapshot
backend/app/services/api_discovery/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ API Discovery subsystem for discovering and classifying free APIs.
3
+ """
4
+
5
+ from .catalog_loader import load_public_apis_catalog
6
+ from .classifier import classify_api
7
+ from .scorer import score_api_usefulness
8
+
9
+ __all__ = [
10
+ "load_public_apis_catalog",
11
+ "classify_api",
12
+ "score_api_usefulness",
13
+ ]
backend/app/services/api_discovery/catalog_loader.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Catalog loader for public-apis dataset.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from typing import List, Dict, Any, Optional
8
+ from pathlib import Path
9
+ import httpx
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # GitHub raw URL for public-apis catalog
14
+ PUBLIC_APIS_URL = "https://raw.githubusercontent.com/public-apis/public-apis/master/entries.json"
15
+ CACHE_FILE = Path(__file__).parent.parent.parent / "data" / "api_catalog_cache.json"
16
+
17
+
18
+ def load_public_apis_catalog(use_cache: bool = True) -> List[Dict[str, Any]]:
19
+ """
20
+ Load public-apis catalog from GitHub or local cache.
21
+
22
+ Args:
23
+ use_cache: If True, use local cache if available
24
+
25
+ Returns:
26
+ List of API entries with:
27
+ - API: API name
28
+ - Description: API description
29
+ - Auth: Auth type (apiKey, OAuth, None, etc.)
30
+ - HTTPS: HTTPS support (True/False)
31
+ - Cors: CORS support (yes/no/unknown)
32
+ - Category: API category
33
+ - Link: API documentation URL
34
+ """
35
+ # Try to load from cache first
36
+ if use_cache and CACHE_FILE.exists():
37
+ try:
38
+ logger.info("Loading API catalog from cache")
39
+ with open(CACHE_FILE, "r", encoding="utf-8") as f:
40
+ data = json.load(f)
41
+ logger.info(f"Loaded {len(data.get('entries', []))} APIs from cache")
42
+ return data.get("entries", [])
43
+ except Exception as e:
44
+ logger.warning(f"Failed to load cache: {e}")
45
+
46
+ # Fetch from GitHub
47
+ try:
48
+ logger.info("Fetching API catalog from GitHub")
49
+ with httpx.Client(timeout=30) as client:
50
+ response = client.get(PUBLIC_APIS_URL)
51
+ response.raise_for_status()
52
+ data = response.json()
53
+
54
+ entries = data.get("entries", [])
55
+ logger.info(f"Fetched {len(entries)} APIs from GitHub")
56
+
57
+ # Save to cache
58
+ try:
59
+ CACHE_FILE.parent.mkdir(parents=True, exist_ok=True)
60
+ with open(CACHE_FILE, "w", encoding="utf-8") as f:
61
+ json.dump(data, f, indent=2)
62
+ logger.info("Saved API catalog to cache")
63
+ except Exception as e:
64
+ logger.warning(f"Failed to save cache: {e}")
65
+
66
+ return entries
67
+
68
+ except Exception as e:
69
+ logger.error(f"Failed to fetch API catalog: {e}")
70
+ return []
71
+
72
+
73
+ def get_api_by_name(name: str) -> Optional[Dict[str, Any]]:
74
+ """Get a specific API by name."""
75
+ catalog = load_public_apis_catalog()
76
+ for api in catalog:
77
+ if api.get("API", "").lower() == name.lower():
78
+ return api
79
+ return None
80
+
81
+
82
+ def get_apis_by_category(category: str) -> List[Dict[str, Any]]:
83
+ """Get all APIs in a specific category."""
84
+ catalog = load_public_apis_catalog()
85
+ return [api for api in catalog if api.get("Category", "").lower() == category.lower()]
backend/app/services/api_discovery/classifier.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ API classifier for categorizing APIs by domain and use case.
3
+ """
4
+
5
+ import logging
6
+ from typing import Dict, Any, List
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ # Domain classification keywords
11
+ DOMAIN_KEYWORDS = {
12
+ "market_data": ["stock", "crypto", "currency", "finance", "trading", "market", "exchange", "commodity"],
13
+ "news": ["news", "article", "media", "press", "journalism", "headline"],
14
+ "social": ["social", "twitter", "facebook", "reddit", "instagram", "community"],
15
+ "government": ["government", "policy", "regulation", "law", "public", "civic", "open data"],
16
+ "weather": ["weather", "climate", "forecast", "meteorology", "temperature"],
17
+ "general": [], # Catch-all
18
+ }
19
+
20
+
21
+ def classify_api(api_entry: Dict[str, Any]) -> Dict[str, Any]:
22
+ """
23
+ Classify API by category and potential use cases.
24
+
25
+ Args:
26
+ api_entry: API entry from catalog with API, Description, Category, etc.
27
+
28
+ Returns:
29
+ Classification with:
30
+ - domain: Detected domain (market_data, news, social, government, weather, general)
31
+ - use_cases: List of potential use cases
32
+ - relevance_score: Relevance to MiroOrg domains (0.0 - 1.0)
33
+ """
34
+ name = api_entry.get("API", "").lower()
35
+ description = api_entry.get("Description", "").lower()
36
+ category = api_entry.get("Category", "").lower()
37
+
38
+ combined_text = f"{name} {description} {category}"
39
+
40
+ # Detect domain
41
+ detected_domain = "general"
42
+ max_matches = 0
43
+
44
+ for domain, keywords in DOMAIN_KEYWORDS.items():
45
+ if domain == "general":
46
+ continue
47
+ matches = sum(1 for keyword in keywords if keyword in combined_text)
48
+ if matches > max_matches:
49
+ max_matches = matches
50
+ detected_domain = domain
51
+
52
+ # Determine use cases
53
+ use_cases = []
54
+ if detected_domain == "market_data":
55
+ use_cases = ["financial_research", "market_analysis", "portfolio_tracking"]
56
+ elif detected_domain == "news":
57
+ use_cases = ["news_research", "sentiment_analysis", "event_detection"]
58
+ elif detected_domain == "social":
59
+ use_cases = ["social_listening", "sentiment_analysis", "trend_detection"]
60
+ elif detected_domain == "government":
61
+ use_cases = ["policy_research", "regulatory_tracking", "open_data_analysis"]
62
+ elif detected_domain == "weather":
63
+ use_cases = ["weather_forecasting", "climate_analysis"]
64
+ else:
65
+ use_cases = ["general_research"]
66
+
67
+ # Calculate relevance score (higher for domains we care about)
68
+ relevance_scores = {
69
+ "market_data": 1.0,
70
+ "news": 0.9,
71
+ "government": 0.8,
72
+ "social": 0.7,
73
+ "weather": 0.5,
74
+ "general": 0.3,
75
+ }
76
+ relevance_score = relevance_scores.get(detected_domain, 0.3)
77
+
78
+ return {
79
+ "domain": detected_domain,
80
+ "use_cases": use_cases,
81
+ "relevance_score": relevance_score,
82
+ }
83
+
84
+
85
+ def classify_multiple_apis(api_entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
86
+ """Classify multiple APIs and return enriched entries."""
87
+ results = []
88
+ for api in api_entries:
89
+ classification = classify_api(api)
90
+ enriched = {**api, **classification}
91
+ results.append(enriched)
92
+ return results
backend/app/services/api_discovery/metadata_store.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Metadata store for API discovery results.
3
+ """
4
+
5
+ import json
6
+ import logging
7
+ from pathlib import Path
8
+ from typing import Dict, Any, List, Optional
9
+ from datetime import datetime
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ METADATA_DIR = Path(__file__).parent.parent.parent / "data" / "api_metadata"
14
+ METADATA_DIR.mkdir(parents=True, exist_ok=True)
15
+
16
+
17
+ def save_api_metadata(api_name: str, metadata: Dict[str, Any]) -> None:
18
+ """Save API metadata to local storage."""
19
+ safe_name = api_name.replace("/", "_").replace(" ", "_")
20
+ path = METADATA_DIR / f"{safe_name}.json"
21
+
22
+ data = {
23
+ **metadata,
24
+ "saved_at": datetime.utcnow().isoformat(),
25
+ }
26
+
27
+ with open(path, "w", encoding="utf-8") as f:
28
+ json.dump(data, f, indent=2)
29
+
30
+ logger.info(f"Saved metadata for API: {api_name}")
31
+
32
+
33
+ def get_api_metadata(api_name: str) -> Optional[Dict[str, Any]]:
34
+ """Get API metadata from local storage."""
35
+ safe_name = api_name.replace("/", "_").replace(" ", "_")
36
+ path = METADATA_DIR / f"{safe_name}.json"
37
+
38
+ if not path.exists():
39
+ return None
40
+
41
+ with open(path, "r", encoding="utf-8") as f:
42
+ return json.load(f)
43
+
44
+
45
+ def list_saved_apis() -> List[str]:
46
+ """List all APIs with saved metadata."""
47
+ return [p.stem for p in METADATA_DIR.glob("*.json")]
48
+
49
+
50
+ def delete_api_metadata(api_name: str) -> bool:
51
+ """Delete API metadata."""
52
+ safe_name = api_name.replace("/", "_").replace(" ", "_")
53
+ path = METADATA_DIR / f"{safe_name}.json"
54
+
55
+ if not path.exists():
56
+ return False
57
+
58
+ path.unlink()
59
+ logger.info(f"Deleted metadata for API: {api_name}")
60
+ return True
backend/app/services/api_discovery/scorer.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ API scorer for prioritizing APIs for integration.
3
+ """
4
+
5
+ import logging
6
+ from typing import Dict, Any
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def score_api_usefulness(api_entry: Dict[str, Any]) -> float:
12
+ """
13
+ Score API for integration priority (0.0 - 1.0).
14
+
15
+ Factors:
16
+ - Auth simplicity (no auth > apiKey > OAuth)
17
+ - HTTPS support (required)
18
+ - CORS support (preferred)
19
+ - Category relevance to domain packs
20
+ - Description quality
21
+
22
+ Args:
23
+ api_entry: API entry from catalog
24
+
25
+ Returns:
26
+ Usefulness score (0.0 - 1.0)
27
+ """
28
+ score = 0.0
29
+
30
+ # Auth simplicity (30% weight)
31
+ auth = api_entry.get("Auth", "").lower()
32
+ if auth == "" or auth == "no":
33
+ score += 0.30 # No auth is easiest
34
+ elif "apikey" in auth or "api key" in auth:
35
+ score += 0.20 # API key is simple
36
+ elif "oauth" in auth:
37
+ score += 0.10 # OAuth is complex
38
+ else:
39
+ score += 0.05 # Unknown auth
40
+
41
+ # HTTPS support (25% weight) - required for security
42
+ https = api_entry.get("HTTPS", False)
43
+ if https:
44
+ score += 0.25
45
+
46
+ # CORS support (15% weight) - preferred for frontend
47
+ cors = api_entry.get("Cors", "").lower()
48
+ if cors == "yes":
49
+ score += 0.15
50
+ elif cors == "unknown":
51
+ score += 0.05
52
+
53
+ # Category relevance (20% weight)
54
+ category = api_entry.get("Category", "").lower()
55
+ relevance_categories = {
56
+ "finance": 1.0,
57
+ "news": 0.9,
58
+ "government": 0.8,
59
+ "social": 0.7,
60
+ "business": 0.8,
61
+ "cryptocurrency": 0.9,
62
+ "weather": 0.6,
63
+ }
64
+ category_score = 0.0
65
+ for cat, weight in relevance_categories.items():
66
+ if cat in category:
67
+ category_score = max(category_score, weight)
68
+ score += 0.20 * category_score
69
+
70
+ # Description quality (10% weight)
71
+ description = api_entry.get("Description", "")
72
+ if len(description) > 50:
73
+ score += 0.10
74
+ elif len(description) > 20:
75
+ score += 0.05
76
+
77
+ return min(score, 1.0) # Cap at 1.0
78
+
79
+
80
+ def rank_apis(api_entries: list[Dict[str, Any]], top_n: int = 10) -> list[Dict[str, Any]]:
81
+ """
82
+ Rank APIs by usefulness score and return top N.
83
+
84
+ Args:
85
+ api_entries: List of API entries
86
+ top_n: Number of top APIs to return
87
+
88
+ Returns:
89
+ Top N APIs sorted by score (descending)
90
+ """
91
+ scored_apis = []
92
+ for api in api_entries:
93
+ score = score_api_usefulness(api)
94
+ scored_apis.append({**api, "usefulness_score": score})
95
+
96
+ # Sort by score descending
97
+ scored_apis.sort(key=lambda x: x.get("usefulness_score", 0.0), reverse=True)
98
+
99
+ return scored_apis[:top_n]
backend/app/services/case_store.py CHANGED
@@ -34,6 +34,8 @@ def list_cases(limit: Optional[int] = None) -> List[Dict[str, Any]]:
34
  "user_input": data.get("user_input", ""),
35
  "saved_at": data.get("saved_at"),
36
  "final_answer_preview": str(data.get("final_answer", ""))[:200],
 
 
37
  }
38
  )
39
  except Exception:
@@ -60,6 +62,20 @@ def delete_case(case_id: str) -> bool:
60
  return True
61
 
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def memory_stats() -> Dict[str, Any]:
64
  files = list(_memory_dir().glob("*.json"))
65
  files_sorted = sorted(files, key=lambda p: p.stat().st_mtime, reverse=True)
 
34
  "user_input": data.get("user_input", ""),
35
  "saved_at": data.get("saved_at"),
36
  "final_answer_preview": str(data.get("final_answer", ""))[:200],
37
+ "simulation_id": data.get("simulation_id"), # Include simulation link
38
+ "route": data.get("route"), # Include route for dashboard display
39
  }
40
  )
41
  except Exception:
 
62
  return True
63
 
64
 
65
+ def get_cases_by_simulation(simulation_id: str) -> List[Dict[str, Any]]:
66
+ """Get all cases linked to a specific simulation."""
67
+ all_cases = []
68
+ for path in _memory_dir().glob("*.json"):
69
+ try:
70
+ with open(path, "r", encoding="utf-8") as f:
71
+ data = json.load(f)
72
+ if data.get("simulation_id") == simulation_id:
73
+ all_cases.append(data)
74
+ except Exception:
75
+ continue
76
+ return all_cases
77
+
78
+
79
  def memory_stats() -> Dict[str, Any]:
80
  files = list(_memory_dir().glob("*.json"))
81
  files_sorted = sorted(files, key=lambda p: p.stat().st_mtime, reverse=True)
backend/app/services/external_sources.py CHANGED
@@ -1,4 +1,6 @@
1
  import re
 
 
2
  from typing import List, Dict, Any, Optional
3
 
4
  import httpx
@@ -10,6 +12,17 @@ from app.config import (
10
  JINA_READER_BASE,
11
  )
12
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  URL_PATTERN = re.compile(r"https?://\S+")
15
  TICKER_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")
@@ -50,13 +63,14 @@ def tavily_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
50
  "search_depth": "basic",
51
  "max_results": max_results,
52
  }
53
- with httpx.Client(timeout=30) as client:
54
- response = client.post("https://api.tavily.com/search", json=payload)
55
  if response.status_code >= 400:
 
56
  return []
57
  data = response.json()
58
  return data.get("results", [])
59
- except Exception:
 
60
  return []
61
 
62
 
@@ -72,13 +86,14 @@ def news_search(query: str, page_size: int = 5) -> List[Dict[str, Any]]:
72
  "sortBy": "publishedAt",
73
  "apiKey": NEWSAPI_KEY,
74
  }
75
- with httpx.Client(timeout=30) as client:
76
- response = client.get("https://newsapi.org/v2/everything", params=params)
77
  if response.status_code >= 400:
 
78
  return []
79
  data = response.json()
80
  return data.get("articles", [])
81
- except Exception:
 
82
  return []
83
 
84
 
@@ -86,19 +101,31 @@ def market_quote(symbol: str) -> Dict[str, Any]:
86
  if not ALPHAVANTAGE_API_KEY or not symbol:
87
  return {}
88
 
 
 
 
 
 
 
89
  try:
90
  params = {
91
  "function": "GLOBAL_QUOTE",
92
  "symbol": symbol,
93
  "apikey": ALPHAVANTAGE_API_KEY,
94
  }
95
- with httpx.Client(timeout=30) as client:
96
- response = client.get("https://www.alphavantage.co/query", params=params)
97
  if response.status_code >= 400:
 
98
  return {}
99
  data = response.json()
100
- return data.get("Global Quote", {})
101
- except Exception:
 
 
 
 
 
 
102
  return {}
103
 
104
 
 
1
  import re
2
+ import time
3
+ import logging
4
  from typing import List, Dict, Any, Optional
5
 
6
  import httpx
 
12
  JINA_READER_BASE,
13
  )
14
 
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Module-level connection pool
18
+ _http_pool = httpx.Client(
19
+ timeout=30,
20
+ limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
21
+ )
22
+
23
+ # Simple TTL cache for market quotes (5 min)
24
+ _quote_cache: Dict[str, Dict[str, Any]] = {} # {symbol: {"data": ..., "ts": ...}}
25
+ _QUOTE_TTL = 300 # seconds
26
 
27
  URL_PATTERN = re.compile(r"https?://\S+")
28
  TICKER_PATTERN = re.compile(r"\$([A-Z]{1,5})\b")
 
63
  "search_depth": "basic",
64
  "max_results": max_results,
65
  }
66
+ response = _http_pool.post("https://api.tavily.com/search", json=payload)
 
67
  if response.status_code >= 400:
68
+ logger.warning(f"Tavily search returned {response.status_code}")
69
  return []
70
  data = response.json()
71
  return data.get("results", [])
72
+ except Exception as e:
73
+ logger.error(f"Tavily search error: {e}")
74
  return []
75
 
76
 
 
86
  "sortBy": "publishedAt",
87
  "apiKey": NEWSAPI_KEY,
88
  }
89
+ response = _http_pool.get("https://newsapi.org/v2/everything", params=params)
 
90
  if response.status_code >= 400:
91
+ logger.warning(f"NewsAPI returned {response.status_code}")
92
  return []
93
  data = response.json()
94
  return data.get("articles", [])
95
+ except Exception as e:
96
+ logger.error(f"NewsAPI error: {e}")
97
  return []
98
 
99
 
 
101
  if not ALPHAVANTAGE_API_KEY or not symbol:
102
  return {}
103
 
104
+ # Check cache first
105
+ cached = _quote_cache.get(symbol)
106
+ if cached and (time.time() - cached["ts"]) < _QUOTE_TTL:
107
+ logger.debug(f"Market quote cache hit: {symbol}")
108
+ return cached["data"]
109
+
110
  try:
111
  params = {
112
  "function": "GLOBAL_QUOTE",
113
  "symbol": symbol,
114
  "apikey": ALPHAVANTAGE_API_KEY,
115
  }
116
+ response = _http_pool.get("https://www.alphavantage.co/query", params=params)
 
117
  if response.status_code >= 400:
118
+ logger.warning(f"Alpha Vantage returned {response.status_code}")
119
  return {}
120
  data = response.json()
121
+ quote = data.get("Global Quote", {})
122
+
123
+ # Cache the result
124
+ _quote_cache[symbol] = {"data": quote, "ts": time.time()}
125
+
126
+ return quote
127
+ except Exception as e:
128
+ logger.error(f"Alpha Vantage error: {e}")
129
  return {}
130
 
131
 
backend/app/services/learning/__init__.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Learning subsystem for autonomous knowledge evolution.
3
+
4
+ This module provides the infrastructure for the system to improve itself
5
+ over time without local model training. It includes:
6
+ - Knowledge ingestion from external sources
7
+ - Experience learning from case execution
8
+ - Prompt evolution through A/B testing
9
+ - Skill distillation from repeated patterns
10
+ - Trust and freshness management
11
+ """
12
+
13
+ from .knowledge_ingestor import KnowledgeIngestor
14
+ from .knowledge_store import KnowledgeStore
15
+ from .learning_engine import LearningEngine
16
+ from .prompt_optimizer import PromptOptimizer
17
+ from .skill_distiller import SkillDistiller
18
+ from .trust_manager import TrustManager
19
+ from .scheduler import LearningScheduler
20
+
21
+ __all__ = [
22
+ "KnowledgeIngestor",
23
+ "KnowledgeStore",
24
+ "LearningEngine",
25
+ "PromptOptimizer",
26
+ "SkillDistiller",
27
+ "TrustManager",
28
+ "LearningScheduler",
29
+ ]
backend/app/services/learning/freshness_manager.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Freshness management for knowledge items.
3
+
4
+ This is a convenience wrapper around TrustManager's freshness functionality.
5
+ """
6
+
7
+ from .trust_manager import TrustManager
8
+
9
+ # Re-export for convenience
10
+ __all__ = ["TrustManager"]
backend/app/services/learning/knowledge_ingestor.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Knowledge ingestion from external sources.
3
+
4
+ Ingests knowledge from web search, URLs, and news sources,
5
+ compressing content to 2-4KB summaries for efficient storage.
6
+ """
7
+
8
+ import logging
9
+ from typing import Dict, Any, Optional
10
+ import httpx
11
+ from datetime import datetime
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class KnowledgeIngestor:
17
+ """Ingests knowledge from external sources."""
18
+
19
+ def __init__(self, tavily_key: Optional[str], newsapi_key: Optional[str], model_fn):
20
+ self.tavily_key = tavily_key
21
+ self.newsapi_key = newsapi_key
22
+ self.model_fn = model_fn
23
+ self.jina_reader_base = "https://r.jina.ai/"
24
+
25
+ async def ingest_from_search(self, query: str, max_results: int = 5) -> list[Dict[str, Any]]:
26
+ """
27
+ Ingest knowledge from web search using Tavily API.
28
+
29
+ Args:
30
+ query: Search query
31
+ max_results: Maximum number of results to ingest
32
+
33
+ Returns:
34
+ List of knowledge items with compressed content
35
+ """
36
+ if not self.tavily_key:
37
+ logger.warning("Tavily API key not configured")
38
+ return []
39
+
40
+ try:
41
+ async with httpx.AsyncClient(timeout=30.0) as client:
42
+ response = await client.post(
43
+ "https://api.tavily.com/search",
44
+ json={
45
+ "api_key": self.tavily_key,
46
+ "query": query,
47
+ "max_results": max_results,
48
+ }
49
+ )
50
+ response.raise_for_status()
51
+ data = response.json()
52
+
53
+ items = []
54
+ for result in data.get("results", []):
55
+ summary = await self.compress_content(result.get("content", ""))
56
+ items.append({
57
+ "source": "tavily_search",
58
+ "query": query,
59
+ "url": result.get("url"),
60
+ "title": result.get("title"),
61
+ "summary": summary,
62
+ "ingested_at": datetime.utcnow().isoformat(),
63
+ })
64
+
65
+ logger.info(f"Ingested {len(items)} items from Tavily search: {query}")
66
+ return items
67
+
68
+ except Exception as e:
69
+ logger.error(f"Failed to ingest from Tavily search: {e}")
70
+ return []
71
+
72
+ async def ingest_from_url(self, url: str) -> Optional[Dict[str, Any]]:
73
+ """
74
+ Ingest knowledge from a specific URL using Jina Reader.
75
+
76
+ Args:
77
+ url: URL to ingest
78
+
79
+ Returns:
80
+ Knowledge item with compressed content, or None if failed
81
+ """
82
+ try:
83
+ async with httpx.AsyncClient(timeout=30.0) as client:
84
+ response = await client.get(f"{self.jina_reader_base}{url}")
85
+ response.raise_for_status()
86
+ content = response.text
87
+
88
+ summary = await self.compress_content(content)
89
+
90
+ item = {
91
+ "source": "jina_reader",
92
+ "url": url,
93
+ "summary": summary,
94
+ "ingested_at": datetime.utcnow().isoformat(),
95
+ }
96
+
97
+ logger.info(f"Ingested content from URL: {url}")
98
+ return item
99
+
100
+ except Exception as e:
101
+ logger.error(f"Failed to ingest from URL {url}: {e}")
102
+ return None
103
+
104
+ async def ingest_from_news(self, query: str, max_results: int = 10) -> list[Dict[str, Any]]:
105
+ """
106
+ Ingest knowledge from news sources using NewsAPI.
107
+
108
+ Args:
109
+ query: News search query
110
+ max_results: Maximum number of articles to ingest
111
+
112
+ Returns:
113
+ List of knowledge items with compressed content
114
+ """
115
+ if not self.newsapi_key:
116
+ logger.warning("NewsAPI key not configured")
117
+ return []
118
+
119
+ try:
120
+ async with httpx.AsyncClient(timeout=30.0) as client:
121
+ response = await client.get(
122
+ "https://newsapi.org/v2/everything",
123
+ params={
124
+ "apiKey": self.newsapi_key,
125
+ "q": query,
126
+ "pageSize": max_results,
127
+ "sortBy": "publishedAt",
128
+ }
129
+ )
130
+ response.raise_for_status()
131
+ data = response.json()
132
+
133
+ items = []
134
+ for article in data.get("articles", []):
135
+ content = f"{article.get('title', '')} {article.get('description', '')} {article.get('content', '')}"
136
+ summary = await self.compress_content(content)
137
+ items.append({
138
+ "source": "newsapi",
139
+ "query": query,
140
+ "url": article.get("url"),
141
+ "title": article.get("title"),
142
+ "published_at": article.get("publishedAt"),
143
+ "summary": summary,
144
+ "ingested_at": datetime.utcnow().isoformat(),
145
+ })
146
+
147
+ logger.info(f"Ingested {len(items)} articles from NewsAPI: {query}")
148
+ return items
149
+
150
+ except Exception as e:
151
+ logger.error(f"Failed to ingest from NewsAPI: {e}")
152
+ return []
153
+
154
+ async def compress_content(self, content: str) -> str:
155
+ """
156
+ Compress content to 2-4KB summary using LLM.
157
+
158
+ Args:
159
+ content: Raw content to compress
160
+
161
+ Returns:
162
+ Compressed summary (2-4KB)
163
+ """
164
+ if not content:
165
+ return ""
166
+
167
+ # If already small enough, return as-is
168
+ if len(content.encode('utf-8')) <= 4096:
169
+ return content
170
+
171
+ prompt = f"""Summarize the following content into a concise summary of 500-1000 words.
172
+ Focus on key facts, insights, and actionable information.
173
+
174
+ Content:
175
+ {content[:10000]}
176
+
177
+ Summary:"""
178
+
179
+ try:
180
+ summary = await self.model_fn(prompt, max_tokens=1500)
181
+
182
+ # Ensure summary is within 2-4KB range
183
+ summary_bytes = summary.encode('utf-8')
184
+ if len(summary_bytes) > 4096:
185
+ # Truncate if too long
186
+ summary = summary_bytes[:4096].decode('utf-8', errors='ignore')
187
+
188
+ return summary
189
+
190
+ except Exception as e:
191
+ logger.error(f"Failed to compress content: {e}")
192
+ # Return truncated content as fallback
193
+ return content[:4096]
backend/app/services/learning/knowledge_store.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Knowledge storage with LRU eviction and expiration management.
3
+
4
+ Stores knowledge items as JSON files with 200MB storage limit.
5
+ Implements LRU eviction when limit is reached.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ from datetime import datetime, timedelta
12
+ from pathlib import Path
13
+ from typing import Dict, Any, Optional, List
14
+ import uuid
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class KnowledgeStore:
20
+ """Stores and retrieves knowledge items with storage limits."""
21
+
22
+ def __init__(self, data_dir: str, max_size_mb: int = 200):
23
+ self.data_dir = Path(data_dir) / "knowledge"
24
+ self.max_size_bytes = max_size_mb * 1024 * 1024
25
+ self.data_dir.mkdir(parents=True, exist_ok=True)
26
+
27
+ def save_knowledge(self, item: Dict[str, Any]) -> str:
28
+ """
29
+ Save a knowledge item to storage.
30
+
31
+ Args:
32
+ item: Knowledge item to save
33
+
34
+ Returns:
35
+ Item ID
36
+ """
37
+ # Generate ID if not present
38
+ if "id" not in item:
39
+ item["id"] = str(uuid.uuid4())
40
+
41
+ # Add metadata
42
+ item["saved_at"] = datetime.utcnow().isoformat()
43
+ item["last_accessed"] = datetime.utcnow().isoformat()
44
+
45
+ # Check storage limit and evict if needed
46
+ self._enforce_storage_limit()
47
+
48
+ # Save to file
49
+ file_path = self.data_dir / f"{item['id']}.json"
50
+ with open(file_path, 'w') as f:
51
+ json.dump(item, f, indent=2)
52
+
53
+ logger.info(f"Saved knowledge item: {item['id']}")
54
+ return item["id"]
55
+
56
+ def get_knowledge(self, item_id: str) -> Optional[Dict[str, Any]]:
57
+ """
58
+ Retrieve a knowledge item by ID.
59
+
60
+ Args:
61
+ item_id: Item ID
62
+
63
+ Returns:
64
+ Knowledge item or None if not found
65
+ """
66
+ file_path = self.data_dir / f"{item_id}.json"
67
+ if not file_path.exists():
68
+ return None
69
+
70
+ with open(file_path, 'r') as f:
71
+ item = json.load(f)
72
+
73
+ # Update last accessed time
74
+ item["last_accessed"] = datetime.utcnow().isoformat()
75
+ with open(file_path, 'w') as f:
76
+ json.dump(item, f, indent=2)
77
+
78
+ return item
79
+
80
+ def search_knowledge(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
81
+ """
82
+ Search knowledge items by query.
83
+
84
+ Args:
85
+ query: Search query
86
+ limit: Maximum number of results
87
+
88
+ Returns:
89
+ List of matching knowledge items
90
+ """
91
+ results = []
92
+ query_lower = query.lower()
93
+
94
+ for file_path in self.data_dir.glob("*.json"):
95
+ try:
96
+ with open(file_path, 'r') as f:
97
+ item = json.load(f)
98
+
99
+ # Simple text search in title and summary
100
+ title = item.get("title", "").lower()
101
+ summary = item.get("summary", "").lower()
102
+
103
+ if query_lower in title or query_lower in summary:
104
+ results.append(item)
105
+
106
+ if len(results) >= limit:
107
+ break
108
+
109
+ except Exception as e:
110
+ logger.error(f"Failed to read knowledge item {file_path}: {e}")
111
+
112
+ # Sort by relevance (title match first, then by recency)
113
+ results.sort(key=lambda x: (
114
+ query_lower not in x.get("title", "").lower(),
115
+ x.get("saved_at", "")
116
+ ), reverse=True)
117
+
118
+ return results[:limit]
119
+
120
+ def list_all(self, limit: Optional[int] = None) -> List[Dict[str, Any]]:
121
+ """
122
+ List all knowledge items.
123
+
124
+ Args:
125
+ limit: Maximum number of items to return
126
+
127
+ Returns:
128
+ List of knowledge items
129
+ """
130
+ items = []
131
+
132
+ for file_path in self.data_dir.glob("*.json"):
133
+ try:
134
+ with open(file_path, 'r') as f:
135
+ item = json.load(f)
136
+ items.append(item)
137
+ except Exception as e:
138
+ logger.error(f"Failed to read knowledge item {file_path}: {e}")
139
+
140
+ # Sort by saved_at descending
141
+ items.sort(key=lambda x: x.get("saved_at", ""), reverse=True)
142
+
143
+ if limit:
144
+ return items[:limit]
145
+ return items
146
+
147
+ def delete_expired_knowledge(self, expiration_days: int = 30) -> int:
148
+ """
149
+ Delete expired knowledge items.
150
+
151
+ Args:
152
+ expiration_days: Number of days before expiration
153
+
154
+ Returns:
155
+ Number of items deleted
156
+ """
157
+ cutoff = datetime.utcnow() - timedelta(days=expiration_days)
158
+ deleted_count = 0
159
+
160
+ for file_path in self.data_dir.glob("*.json"):
161
+ try:
162
+ with open(file_path, 'r') as f:
163
+ item = json.load(f)
164
+
165
+ saved_at = datetime.fromisoformat(item.get("saved_at", ""))
166
+ if saved_at < cutoff:
167
+ file_path.unlink()
168
+ deleted_count += 1
169
+ logger.info(f"Deleted expired knowledge item: {item.get('id')}")
170
+
171
+ except Exception as e:
172
+ logger.error(f"Failed to check expiration for {file_path}: {e}")
173
+
174
+ logger.info(f"Deleted {deleted_count} expired knowledge items")
175
+ return deleted_count
176
+
177
+ def _enforce_storage_limit(self):
178
+ """Enforce storage limit using LRU eviction."""
179
+ current_size = self._get_storage_size()
180
+
181
+ if current_size <= self.max_size_bytes:
182
+ return
183
+
184
+ logger.warning(f"Storage limit exceeded: {current_size / 1024 / 1024:.2f}MB / {self.max_size_bytes / 1024 / 1024:.2f}MB")
185
+
186
+ # Get all items sorted by last_accessed (LRU)
187
+ items = []
188
+ for file_path in self.data_dir.glob("*.json"):
189
+ try:
190
+ with open(file_path, 'r') as f:
191
+ item = json.load(f)
192
+ items.append((file_path, item.get("last_accessed", "")))
193
+ except Exception as e:
194
+ logger.error(f"Failed to read {file_path}: {e}")
195
+
196
+ items.sort(key=lambda x: x[1]) # Sort by last_accessed ascending
197
+
198
+ # Delete oldest items until under limit
199
+ for file_path, _ in items:
200
+ if current_size <= self.max_size_bytes:
201
+ break
202
+
203
+ file_size = file_path.stat().st_size
204
+ file_path.unlink()
205
+ current_size -= file_size
206
+ logger.info(f"Evicted knowledge item (LRU): {file_path.name}")
207
+
208
+ def _get_storage_size(self) -> int:
209
+ """Get total storage size in bytes."""
210
+ total_size = 0
211
+ for file_path in self.data_dir.glob("*.json"):
212
+ total_size += file_path.stat().st_size
213
+ return total_size
214
+
215
+ def get_storage_stats(self) -> Dict[str, Any]:
216
+ """Get storage statistics."""
217
+ total_size = self._get_storage_size()
218
+ item_count = len(list(self.data_dir.glob("*.json")))
219
+
220
+ return {
221
+ "total_size_mb": total_size / 1024 / 1024,
222
+ "max_size_mb": self.max_size_bytes / 1024 / 1024,
223
+ "usage_percent": (total_size / self.max_size_bytes) * 100 if self.max_size_bytes > 0 else 0,
224
+ "item_count": item_count,
225
+ }
backend/app/services/learning/learning_engine.py ADDED
@@ -0,0 +1,439 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core learning engine that coordinates all learning activities.
3
+
4
+ Orchestrates knowledge ingestion, experience learning, prompt evolution,
5
+ skill distillation, trust management, and freshness management.
6
+ """
7
+
8
+ import logging
9
+ from typing import Dict, Any, List, Optional
10
+ from datetime import datetime
11
+ from collections import Counter
12
+
13
+ from .knowledge_ingestor import KnowledgeIngestor
14
+ from .knowledge_store import KnowledgeStore
15
+ from .prompt_optimizer import PromptOptimizer
16
+ from .skill_distiller import SkillDistiller
17
+ from .trust_manager import TrustManager
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class LearningEngine:
23
+ """Coordinates all learning activities."""
24
+
25
+ def __init__(
26
+ self,
27
+ knowledge_store: KnowledgeStore,
28
+ knowledge_ingestor: KnowledgeIngestor,
29
+ prompt_optimizer: Optional[PromptOptimizer] = None,
30
+ skill_distiller: Optional[SkillDistiller] = None,
31
+ trust_manager: Optional[TrustManager] = None,
32
+ ):
33
+ self.knowledge_store = knowledge_store
34
+ self.knowledge_ingestor = knowledge_ingestor
35
+ self.prompt_optimizer = prompt_optimizer
36
+ self.skill_distiller = skill_distiller
37
+ self.trust_manager = trust_manager
38
+ self.last_run: Dict[str, str] = {}
39
+
40
+ # In-memory learning metadata (flushed periodically)
41
+ self._case_learnings: List[Dict[str, Any]] = []
42
+ self._route_stats: Counter = Counter()
43
+ self._provider_stats: Dict[str, Dict[str, int]] = {}
44
+ self._prompt_performance: Dict[str, Dict[str, Any]] = {}
45
+
46
+ # ── Knowledge Ingestion ──────────────────────────────────────────────────
47
+
48
+ async def run_knowledge_ingestion(self, topics: list[str]) -> Dict[str, Any]:
49
+ """
50
+ Run knowledge ingestion for specified topics.
51
+
52
+ Args:
53
+ topics: List of topics to ingest knowledge about
54
+
55
+ Returns:
56
+ Ingestion results
57
+ """
58
+ logger.info(f"Running knowledge ingestion for topics: {topics}")
59
+
60
+ total_items = 0
61
+ results = []
62
+
63
+ for topic in topics:
64
+ search_items = await self.knowledge_ingestor.ingest_from_search(topic)
65
+ for item in search_items:
66
+ self.knowledge_store.save_knowledge(item)
67
+ total_items += 1
68
+
69
+ news_items = await self.knowledge_ingestor.ingest_from_news(topic)
70
+ for item in news_items:
71
+ self.knowledge_store.save_knowledge(item)
72
+ total_items += 1
73
+
74
+ results.append({
75
+ "topic": topic,
76
+ "search_items": len(search_items),
77
+ "news_items": len(news_items),
78
+ })
79
+
80
+ self.last_run["knowledge_ingestion"] = datetime.utcnow().isoformat()
81
+
82
+ logger.info(f"Knowledge ingestion complete: {total_items} items ingested")
83
+
84
+ return {
85
+ "total_items": total_items,
86
+ "results": results,
87
+ "timestamp": self.last_run["knowledge_ingestion"],
88
+ }
89
+
90
+ async def run_cleanup(self, expiration_days: int = 30) -> Dict[str, Any]:
91
+ """Run cleanup of expired knowledge."""
92
+ logger.info("Running knowledge cleanup")
93
+
94
+ deleted_count = self.knowledge_store.delete_expired_knowledge(expiration_days)
95
+ self.last_run["cleanup"] = datetime.utcnow().isoformat()
96
+
97
+ return {
98
+ "deleted_count": deleted_count,
99
+ "timestamp": self.last_run["cleanup"],
100
+ }
101
+
102
+ # ── Experience Learning (Task 34) ────────────────────────────────────────
103
+
104
+ def learn_from_case(self, case_data: Dict[str, Any]) -> Dict[str, Any]:
105
+ """
106
+ Extract learning metadata from a completed case execution.
107
+
108
+ Args:
109
+ case_data: Complete case payload
110
+
111
+ Returns:
112
+ Learning metadata extracted
113
+ """
114
+ case_id = case_data.get("case_id", "unknown")
115
+ logger.info(f"Learning from case {case_id}")
116
+
117
+ route = case_data.get("route", {})
118
+ outputs = case_data.get("outputs", [])
119
+
120
+ # 1. Track route effectiveness
121
+ route_key = f"{route.get('domain_pack', 'general')}:{route.get('execution_mode', 'standard')}"
122
+ self._route_stats[route_key] += 1
123
+
124
+ # 2. Track which agents produced useful output
125
+ agents_used = []
126
+ agent_quality = {}
127
+ for output in outputs:
128
+ if isinstance(output, dict):
129
+ agent_name = output.get("agent", "unknown")
130
+ summary = output.get("summary", "")
131
+ confidence = output.get("confidence", 0.0)
132
+ agents_used.append(agent_name)
133
+ agent_quality[agent_name] = {
134
+ "output_length": len(summary),
135
+ "confidence": confidence,
136
+ "produced_output": len(summary) > 10,
137
+ }
138
+
139
+ # 3. Build learning record
140
+ learning = {
141
+ "case_id": case_id,
142
+ "route": route,
143
+ "agents_used": agents_used,
144
+ "agent_quality": agent_quality,
145
+ "domain": route.get("domain_pack", "general"),
146
+ "complexity": route.get("complexity", "medium"),
147
+ "execution_mode": route.get("execution_mode", "standard"),
148
+ "learned_at": datetime.utcnow().isoformat(),
149
+ }
150
+
151
+ self._case_learnings.append(learning)
152
+
153
+ # Keep only last 500 learnings in memory
154
+ if len(self._case_learnings) > 500:
155
+ self._case_learnings = self._case_learnings[-500:]
156
+
157
+ # 4. Update trust scores for sources mentioned in research
158
+ if self.trust_manager:
159
+ for output in outputs:
160
+ if isinstance(output, dict) and output.get("agent") == "research":
161
+ sources = output.get("details", {}).get("sources", [])
162
+ for source in sources:
163
+ if isinstance(source, str):
164
+ self.trust_manager.update_trust(source, True, weight=0.5)
165
+
166
+ logger.info(f"Learned from case {case_id}: route={route_key}, agents={agents_used}")
167
+ return learning
168
+
169
+ def detect_patterns(self, min_frequency: int = 3) -> List[Dict[str, Any]]:
170
+ """
171
+ Detect patterns in recent case executions.
172
+
173
+ Args:
174
+ min_frequency: Minimum occurrences to count as a pattern
175
+
176
+ Returns:
177
+ List of detected patterns
178
+ """
179
+ if len(self._case_learnings) < min_frequency:
180
+ return []
181
+
182
+ patterns = []
183
+
184
+ # Pattern 1: Domain frequency
185
+ domain_counts = Counter(l["domain"] for l in self._case_learnings)
186
+ for domain, count in domain_counts.items():
187
+ if count >= min_frequency:
188
+ patterns.append({
189
+ "type": "domain_frequency",
190
+ "domain": domain,
191
+ "count": count,
192
+ "percentage": count / len(self._case_learnings) * 100,
193
+ })
194
+
195
+ # Pattern 2: Execution mode frequency
196
+ mode_counts = Counter(l["execution_mode"] for l in self._case_learnings)
197
+ for mode, count in mode_counts.items():
198
+ if count >= min_frequency:
199
+ patterns.append({
200
+ "type": "execution_mode_frequency",
201
+ "mode": mode,
202
+ "count": count,
203
+ "percentage": count / len(self._case_learnings) * 100,
204
+ })
205
+
206
+ # Pattern 3: Agent combinations that produce high confidence
207
+ high_confidence_combos = Counter()
208
+ for learning in self._case_learnings:
209
+ quality = learning.get("agent_quality", {})
210
+ high_conf_agents = [
211
+ a for a, q in quality.items()
212
+ if q.get("confidence", 0) > 0.7
213
+ ]
214
+ if high_conf_agents:
215
+ high_confidence_combos[tuple(sorted(high_conf_agents))] += 1
216
+
217
+ for combo, count in high_confidence_combos.items():
218
+ if count >= min_frequency:
219
+ patterns.append({
220
+ "type": "high_confidence_agents",
221
+ "agents": list(combo),
222
+ "count": count,
223
+ })
224
+
225
+ self.last_run["pattern_detection"] = datetime.utcnow().isoformat()
226
+ logger.info(f"Detected {len(patterns)} patterns from {len(self._case_learnings)} cases")
227
+ return patterns
228
+
229
+ def get_route_effectiveness(self) -> Dict[str, Any]:
230
+ """Get route effectiveness insights."""
231
+ total = sum(self._route_stats.values())
232
+ if total == 0:
233
+ return {"total_cases": 0, "routes": {}}
234
+
235
+ return {
236
+ "total_cases": total,
237
+ "routes": {
238
+ route: {
239
+ "count": count,
240
+ "percentage": round(count / total * 100, 1),
241
+ }
242
+ for route, count in self._route_stats.most_common()
243
+ },
244
+ }
245
+
246
+ def get_prompt_performance(self) -> Dict[str, Any]:
247
+ """Get prompt performance insights from case learnings."""
248
+ if not self._case_learnings:
249
+ return {"total_cases": 0, "agents": {}}
250
+
251
+ agent_stats: Dict[str, Dict[str, Any]] = {}
252
+ for learning in self._case_learnings:
253
+ for agent, quality in learning.get("agent_quality", {}).items():
254
+ if agent not in agent_stats:
255
+ agent_stats[agent] = {
256
+ "total_runs": 0,
257
+ "total_confidence": 0.0,
258
+ "produced_output_count": 0,
259
+ }
260
+ agent_stats[agent]["total_runs"] += 1
261
+ agent_stats[agent]["total_confidence"] += quality.get("confidence", 0)
262
+ if quality.get("produced_output", False):
263
+ agent_stats[agent]["produced_output_count"] += 1
264
+
265
+ # Calculate averages
266
+ for agent, stats in agent_stats.items():
267
+ runs = stats["total_runs"]
268
+ stats["avg_confidence"] = round(stats["total_confidence"] / runs, 3) if runs > 0 else 0
269
+ stats["output_rate"] = round(stats["produced_output_count"] / runs, 3) if runs > 0 else 0
270
+
271
+ return {
272
+ "total_cases": len(self._case_learnings),
273
+ "agents": agent_stats,
274
+ }
275
+
276
+ # ── Prompt Evolution (Task 35) ───────────────────────────────────────────
277
+
278
+ async def run_prompt_optimization(self, prompt_names: List[str]) -> Dict[str, Any]:
279
+ """
280
+ Run prompt optimization for specified prompts.
281
+
282
+ Uses prompt_optimizer to create improved variants based on
283
+ prompt performance data.
284
+ """
285
+ if not self.prompt_optimizer:
286
+ return {"status": "skipped", "reason": "prompt_optimizer not configured"}
287
+
288
+ results = []
289
+ performance = self.get_prompt_performance()
290
+
291
+ for name in prompt_names:
292
+ agent_perf = performance.get("agents", {}).get(name, {})
293
+ avg_conf = agent_perf.get("avg_confidence", 0.5)
294
+
295
+ # Only optimize prompts with low average confidence
296
+ if avg_conf > 0.8:
297
+ results.append({"prompt": name, "status": "skipped", "reason": "already high performance"})
298
+ continue
299
+
300
+ try:
301
+ from app.services.prompt_store import get_prompt
302
+ prompt_data = get_prompt(name)
303
+ if not prompt_data:
304
+ continue
305
+
306
+ goal = f"Improve output quality (current avg confidence: {avg_conf:.2f})"
307
+ variant = await self.prompt_optimizer.create_prompt_variant(
308
+ name, prompt_data["content"], goal
309
+ )
310
+ results.append({"prompt": name, "status": "variant_created", "variant_id": variant["id"]})
311
+ except Exception as e:
312
+ logger.error(f"Failed to optimize prompt {name}: {e}")
313
+ results.append({"prompt": name, "status": "error", "error": str(e)})
314
+
315
+ self.last_run["prompt_optimization"] = datetime.utcnow().isoformat()
316
+ return {"results": results, "timestamp": self.last_run["prompt_optimization"]}
317
+
318
+ def get_active_prompt(self, prompt_name: str) -> Optional[str]:
319
+ """
320
+ Get the active production prompt text, if one has been promoted.
321
+
322
+ Args:
323
+ prompt_name: Prompt name (e.g., "research", "verifier")
324
+
325
+ Returns:
326
+ Production prompt text, or None if no production version exists
327
+ """
328
+ if not self.prompt_optimizer:
329
+ return None
330
+
331
+ production = self.prompt_optimizer._get_production_variant(prompt_name)
332
+ if production:
333
+ return production.get("prompt_text")
334
+ return None
335
+
336
+ # ── Skill Distillation (Task 36) ─────────────────────────────────────────
337
+
338
+ async def run_skill_distillation(self, min_frequency: int = 3) -> Dict[str, Any]:
339
+ """
340
+ Run skill distillation from recent case patterns.
341
+ """
342
+ if not self.skill_distiller:
343
+ return {"status": "skipped", "reason": "skill_distiller not configured"}
344
+
345
+ # Use in-memory case learnings as source data
346
+ candidates = self.skill_distiller.detect_skill_candidates(
347
+ self._case_learnings, min_frequency=min_frequency
348
+ )
349
+
350
+ skills_created = []
351
+ for candidate in candidates[:5]:
352
+ example_cases = [
353
+ l for l in self._case_learnings
354
+ if l.get("domain") == candidate.get("domain")
355
+ ][:3]
356
+ try:
357
+ skill = await self.skill_distiller.distill_skill(candidate, example_cases)
358
+ skills_created.append(skill)
359
+ except Exception as e:
360
+ logger.error(f"Failed to distill skill: {e}")
361
+
362
+ self.last_run["skill_distillation"] = datetime.utcnow().isoformat()
363
+
364
+ return {
365
+ "candidates_found": len(candidates),
366
+ "skills_created": len(skills_created),
367
+ "skills": skills_created,
368
+ "timestamp": self.last_run["skill_distillation"],
369
+ }
370
+
371
+ # ── Trust & Freshness (Task 37) ──────────────────────────────────────────
372
+
373
+ async def run_freshness_refresh(self) -> Dict[str, Any]:
374
+ """
375
+ Check freshness of all knowledge items and flag stale ones.
376
+ """
377
+ if not self.trust_manager:
378
+ return {"status": "skipped", "reason": "trust_manager not configured"}
379
+
380
+ all_items = self.knowledge_store.list_all()
381
+ stale = self.trust_manager.get_stale_items(all_items, threshold=0.3)
382
+ recommendations = self.trust_manager.recommend_refresh(stale, max_recommendations=10)
383
+
384
+ # Update freshness scores
385
+ for item in all_items:
386
+ freshness = self.trust_manager.calculate_freshness(item)
387
+ item_id = item.get("id")
388
+ if item_id:
389
+ self.trust_manager.update_freshness(item_id, freshness)
390
+
391
+ self.last_run["freshness_refresh"] = datetime.utcnow().isoformat()
392
+
393
+ return {
394
+ "total_items": len(all_items),
395
+ "stale_items": len(stale),
396
+ "refresh_recommendations": len(recommendations),
397
+ "recommendations": recommendations,
398
+ "timestamp": self.last_run["freshness_refresh"],
399
+ }
400
+
401
+ # ── Status & Insights ────────────────────────────────────────────────────
402
+
403
+ def get_status(self) -> Dict[str, Any]:
404
+ """Get learning engine status."""
405
+ storage_stats = self.knowledge_store.get_storage_stats()
406
+
407
+ return {
408
+ "storage": storage_stats,
409
+ "last_run": self.last_run,
410
+ "enabled": True,
411
+ "cases_learned": len(self._case_learnings),
412
+ "components": {
413
+ "knowledge_store": True,
414
+ "knowledge_ingestor": True,
415
+ "prompt_optimizer": self.prompt_optimizer is not None,
416
+ "skill_distiller": self.skill_distiller is not None,
417
+ "trust_manager": self.trust_manager is not None,
418
+ },
419
+ }
420
+
421
+ def get_insights(self) -> Dict[str, Any]:
422
+ """Get comprehensive learning insights."""
423
+ recent_items = self.knowledge_store.list_all(limit=10)
424
+
425
+ return {
426
+ "recent_knowledge": [
427
+ {
428
+ "id": item.get("id"),
429
+ "title": item.get("title"),
430
+ "source": item.get("source"),
431
+ "saved_at": item.get("saved_at"),
432
+ }
433
+ for item in recent_items
434
+ ],
435
+ "storage_stats": self.knowledge_store.get_storage_stats(),
436
+ "route_effectiveness": self.get_route_effectiveness(),
437
+ "prompt_performance": self.get_prompt_performance(),
438
+ "patterns": self.detect_patterns(),
439
+ }
backend/app/services/learning/prompt_optimizer.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Prompt evolution through A/B testing and versioning.
3
+
4
+ Creates prompt variants, tests them, and promotes better versions.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Dict, Any, Optional, List
12
+ import uuid
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class PromptOptimizer:
18
+ """Manages prompt evolution and A/B testing."""
19
+
20
+ def __init__(self, data_dir: str, model_fn):
21
+ self.data_dir = Path(data_dir) / "prompt_versions"
22
+ self.data_dir.mkdir(parents=True, exist_ok=True)
23
+ self.model_fn = model_fn
24
+
25
+ async def create_prompt_variant(self, prompt_name: str, current_prompt: str, goal: str) -> Dict[str, Any]:
26
+ """
27
+ Create a new prompt variant using LLM.
28
+
29
+ Args:
30
+ prompt_name: Name of the prompt (e.g., "research", "verifier")
31
+ current_prompt: Current prompt text
32
+ goal: Optimization goal (e.g., "improve clarity", "reduce tokens")
33
+
34
+ Returns:
35
+ Prompt variant with metadata
36
+ """
37
+ logger.info(f"Creating prompt variant for {prompt_name} with goal: {goal}")
38
+
39
+ optimization_prompt = f"""You are a prompt engineering expert. Improve the following prompt based on this goal: {goal}
40
+
41
+ Current prompt:
42
+ {current_prompt}
43
+
44
+ Create an improved version that:
45
+ 1. Maintains the same core functionality
46
+ 2. {goal}
47
+ 3. Is clear and actionable
48
+
49
+ Improved prompt:"""
50
+
51
+ try:
52
+ improved_prompt = await self.model_fn(optimization_prompt, max_tokens=2000)
53
+
54
+ variant = {
55
+ "id": str(uuid.uuid4()),
56
+ "prompt_name": prompt_name,
57
+ "version": self._get_next_version(prompt_name),
58
+ "prompt_text": improved_prompt,
59
+ "goal": goal,
60
+ "status": "testing",
61
+ "created_at": datetime.utcnow().isoformat(),
62
+ "test_count": 0,
63
+ "win_count": 0,
64
+ "win_rate": 0.0,
65
+ }
66
+
67
+ self._save_variant(variant)
68
+ logger.info(f"Created prompt variant: {variant['id']} (v{variant['version']})")
69
+
70
+ return variant
71
+
72
+ except Exception as e:
73
+ logger.error(f"Failed to create prompt variant: {e}")
74
+ raise
75
+
76
+ async def test_prompt_variant(self, variant_id: str, test_input: str, expected_quality: str) -> Dict[str, Any]:
77
+ """
78
+ Test a prompt variant with sample input.
79
+
80
+ Args:
81
+ variant_id: Variant ID
82
+ test_input: Test input
83
+ expected_quality: Expected quality criteria
84
+
85
+ Returns:
86
+ Test results
87
+ """
88
+ variant = self._load_variant(variant_id)
89
+ if not variant:
90
+ raise ValueError(f"Variant not found: {variant_id}")
91
+
92
+ logger.info(f"Testing prompt variant: {variant_id}")
93
+
94
+ try:
95
+ # Run the prompt with test input
96
+ test_prompt = variant["prompt_text"].replace("{input}", test_input)
97
+ output = await self.model_fn(test_prompt, max_tokens=1000)
98
+
99
+ # Evaluate quality
100
+ quality_score = await self._evaluate_quality(output, expected_quality)
101
+
102
+ # Update variant stats
103
+ variant["test_count"] += 1
104
+ if quality_score >= 0.7: # Consider it a "win" if quality >= 70%
105
+ variant["win_count"] += 1
106
+ variant["win_rate"] = variant["win_count"] / variant["test_count"]
107
+
108
+ self._save_variant(variant)
109
+
110
+ return {
111
+ "variant_id": variant_id,
112
+ "quality_score": quality_score,
113
+ "output": output,
114
+ "win_rate": variant["win_rate"],
115
+ }
116
+
117
+ except Exception as e:
118
+ logger.error(f"Failed to test prompt variant: {e}")
119
+ raise
120
+
121
+ def compare_prompts(self, variant_id_a: str, variant_id_b: str) -> Dict[str, Any]:
122
+ """
123
+ Compare two prompt variants.
124
+
125
+ Args:
126
+ variant_id_a: First variant ID
127
+ variant_id_b: Second variant ID
128
+
129
+ Returns:
130
+ Comparison results
131
+ """
132
+ variant_a = self._load_variant(variant_id_a)
133
+ variant_b = self._load_variant(variant_id_b)
134
+
135
+ if not variant_a or not variant_b:
136
+ raise ValueError("One or both variants not found")
137
+
138
+ return {
139
+ "variant_a": {
140
+ "id": variant_a["id"],
141
+ "version": variant_a["version"],
142
+ "win_rate": variant_a["win_rate"],
143
+ "test_count": variant_a["test_count"],
144
+ },
145
+ "variant_b": {
146
+ "id": variant_b["id"],
147
+ "version": variant_b["version"],
148
+ "win_rate": variant_b["win_rate"],
149
+ "test_count": variant_b["test_count"],
150
+ },
151
+ "winner": variant_a["id"] if variant_a["win_rate"] > variant_b["win_rate"] else variant_b["id"],
152
+ }
153
+
154
+ def promote_prompt(self, variant_id: str, min_tests: int = 10, min_win_rate: float = 0.7) -> bool:
155
+ """
156
+ Promote a prompt variant to production.
157
+
158
+ Args:
159
+ variant_id: Variant ID
160
+ min_tests: Minimum number of tests required
161
+ min_win_rate: Minimum win rate required
162
+
163
+ Returns:
164
+ True if promoted, False otherwise
165
+ """
166
+ variant = self._load_variant(variant_id)
167
+ if not variant:
168
+ raise ValueError(f"Variant not found: {variant_id}")
169
+
170
+ # Validate promotion criteria
171
+ if variant["test_count"] < min_tests:
172
+ logger.warning(f"Variant {variant_id} has insufficient tests: {variant['test_count']} < {min_tests}")
173
+ return False
174
+
175
+ if variant["win_rate"] < min_win_rate:
176
+ logger.warning(f"Variant {variant_id} has insufficient win rate: {variant['win_rate']} < {min_win_rate}")
177
+ return False
178
+
179
+ # Archive current production version
180
+ current_production = self._get_production_variant(variant["prompt_name"])
181
+ if current_production:
182
+ current_production["status"] = "archived"
183
+ current_production["archived_at"] = datetime.utcnow().isoformat()
184
+ self._save_variant(current_production)
185
+
186
+ # Promote variant
187
+ variant["status"] = "production"
188
+ variant["promoted_at"] = datetime.utcnow().isoformat()
189
+ self._save_variant(variant)
190
+
191
+ logger.info(f"Promoted prompt variant {variant_id} to production")
192
+ return True
193
+
194
+ def archive_prompt(self, variant_id: str):
195
+ """Archive a prompt variant."""
196
+ variant = self._load_variant(variant_id)
197
+ if not variant:
198
+ raise ValueError(f"Variant not found: {variant_id}")
199
+
200
+ variant["status"] = "archived"
201
+ variant["archived_at"] = datetime.utcnow().isoformat()
202
+ self._save_variant(variant)
203
+
204
+ logger.info(f"Archived prompt variant: {variant_id}")
205
+
206
+ def list_versions(self, prompt_name: str) -> List[Dict[str, Any]]:
207
+ """List all versions of a prompt."""
208
+ versions = []
209
+
210
+ for file_path in self.data_dir.glob(f"{prompt_name}_*.json"):
211
+ try:
212
+ with open(file_path, 'r') as f:
213
+ variant = json.load(f)
214
+ versions.append(variant)
215
+ except Exception as e:
216
+ logger.error(f"Failed to read variant {file_path}: {e}")
217
+
218
+ # Sort by version descending
219
+ versions.sort(key=lambda x: x.get("version", 0), reverse=True)
220
+ return versions
221
+
222
+ def _save_variant(self, variant: Dict[str, Any]):
223
+ """Save a prompt variant to disk."""
224
+ file_path = self.data_dir / f"{variant['prompt_name']}_{variant['id']}.json"
225
+ with open(file_path, 'w') as f:
226
+ json.dump(variant, f, indent=2)
227
+
228
+ def _load_variant(self, variant_id: str) -> Optional[Dict[str, Any]]:
229
+ """Load a prompt variant from disk."""
230
+ for file_path in self.data_dir.glob(f"*_{variant_id}.json"):
231
+ with open(file_path, 'r') as f:
232
+ return json.load(f)
233
+ return None
234
+
235
+ def _get_production_variant(self, prompt_name: str) -> Optional[Dict[str, Any]]:
236
+ """Get the current production variant for a prompt."""
237
+ for file_path in self.data_dir.glob(f"{prompt_name}_*.json"):
238
+ try:
239
+ with open(file_path, 'r') as f:
240
+ variant = json.load(f)
241
+ if variant.get("status") == "production":
242
+ return variant
243
+ except Exception as e:
244
+ logger.error(f"Failed to read variant {file_path}: {e}")
245
+ return None
246
+
247
+ def _get_next_version(self, prompt_name: str) -> int:
248
+ """Get the next version number for a prompt."""
249
+ versions = self.list_versions(prompt_name)
250
+ if not versions:
251
+ return 1
252
+ return max(v.get("version", 0) for v in versions) + 1
253
+
254
+ async def _evaluate_quality(self, output: str, expected_quality: str) -> float:
255
+ """Evaluate output quality using LLM."""
256
+ eval_prompt = f"""Evaluate the quality of this output based on the criteria: {expected_quality}
257
+
258
+ Output:
259
+ {output}
260
+
261
+ Rate the quality from 0.0 to 1.0, where:
262
+ - 0.0 = Completely fails criteria
263
+ - 0.5 = Partially meets criteria
264
+ - 1.0 = Fully meets criteria
265
+
266
+ Provide only the numeric score:"""
267
+
268
+ try:
269
+ score_text = await self.model_fn(eval_prompt, max_tokens=10)
270
+ score = float(score_text.strip())
271
+ return max(0.0, min(1.0, score)) # Clamp to [0, 1]
272
+ except Exception as e:
273
+ logger.error(f"Failed to evaluate quality: {e}")
274
+ return 0.5 # Default to neutral score
backend/app/services/learning/scheduler.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Learning scheduler with safeguards for laptop deployment.
3
+
4
+ Schedules learning tasks with CPU, battery, and system idle checks.
5
+ """
6
+
7
+ import asyncio
8
+ import logging
9
+ import psutil
10
+ from datetime import datetime, timedelta
11
+ from typing import Dict, Any, Callable, Optional
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class LearningScheduler:
17
+ """Schedules learning tasks with system safeguards."""
18
+
19
+ def __init__(
20
+ self,
21
+ max_cpu_percent: float = 50.0,
22
+ min_battery_percent: float = 30.0,
23
+ check_interval_seconds: int = 60,
24
+ ):
25
+ self.max_cpu_percent = max_cpu_percent
26
+ self.min_battery_percent = min_battery_percent
27
+ self.check_interval_seconds = check_interval_seconds
28
+ self.scheduled_tasks = {}
29
+ self.running = False
30
+ self.last_run = {}
31
+
32
+ def schedule_task(
33
+ self,
34
+ task_name: str,
35
+ task_fn: Callable,
36
+ interval_hours: int,
37
+ run_immediately: bool = False,
38
+ ):
39
+ """
40
+ Schedule a learning task.
41
+
42
+ Args:
43
+ task_name: Task name
44
+ task_fn: Async function to run
45
+ interval_hours: Interval in hours
46
+ run_immediately: Whether to run immediately on first check
47
+ """
48
+ self.scheduled_tasks[task_name] = {
49
+ "fn": task_fn,
50
+ "interval": timedelta(hours=interval_hours),
51
+ "last_run": None if run_immediately else datetime.utcnow(),
52
+ }
53
+ logger.info(f"Scheduled task: {task_name} (interval={interval_hours}h)")
54
+
55
+ async def start(self):
56
+ """Start the scheduler."""
57
+ if self.running:
58
+ logger.warning("Scheduler already running")
59
+ return
60
+
61
+ self.running = True
62
+ logger.info("Learning scheduler started")
63
+
64
+ while self.running:
65
+ try:
66
+ await self._check_and_run_tasks()
67
+ await asyncio.sleep(self.check_interval_seconds)
68
+ except Exception as e:
69
+ logger.error(f"Scheduler error: {e}")
70
+ await asyncio.sleep(self.check_interval_seconds)
71
+
72
+ def stop(self):
73
+ """Stop the scheduler."""
74
+ self.running = False
75
+ logger.info("Learning scheduler stopped")
76
+
77
+ async def run_once(self, task_name: str) -> Dict[str, Any]:
78
+ """
79
+ Run a task once manually.
80
+
81
+ Args:
82
+ task_name: Task name
83
+
84
+ Returns:
85
+ Task result
86
+ """
87
+ if task_name not in self.scheduled_tasks:
88
+ raise ValueError(f"Task not found: {task_name}")
89
+
90
+ task = self.scheduled_tasks[task_name]
91
+
92
+ logger.info(f"Running task manually: {task_name}")
93
+
94
+ try:
95
+ result = await task["fn"]()
96
+ task["last_run"] = datetime.utcnow()
97
+ self.last_run[task_name] = task["last_run"].isoformat()
98
+
99
+ return {
100
+ "task_name": task_name,
101
+ "status": "success",
102
+ "result": result,
103
+ "timestamp": task["last_run"].isoformat(),
104
+ }
105
+
106
+ except Exception as e:
107
+ logger.error(f"Task failed: {task_name}: {e}")
108
+ return {
109
+ "task_name": task_name,
110
+ "status": "error",
111
+ "error": str(e),
112
+ "timestamp": datetime.utcnow().isoformat(),
113
+ }
114
+
115
+ def is_system_idle(self) -> bool:
116
+ """
117
+ Check if system is idle (low CPU usage).
118
+
119
+ Returns:
120
+ True if system is idle
121
+ """
122
+ try:
123
+ cpu_percent = psutil.cpu_percent(interval=1)
124
+ is_idle = cpu_percent < self.max_cpu_percent
125
+
126
+ if not is_idle:
127
+ logger.debug(f"System not idle: CPU={cpu_percent:.1f}% (max={self.max_cpu_percent}%)")
128
+
129
+ return is_idle
130
+
131
+ except Exception as e:
132
+ logger.error(f"Failed to check CPU usage: {e}")
133
+ return False # Assume not idle on error
134
+
135
+ def is_battery_ok(self) -> bool:
136
+ """
137
+ Check if battery level is sufficient.
138
+
139
+ Returns:
140
+ True if battery is OK or plugged in
141
+ """
142
+ try:
143
+ battery = psutil.sensors_battery()
144
+
145
+ # If no battery (desktop) or plugged in, always OK
146
+ if battery is None or battery.power_plugged:
147
+ return True
148
+
149
+ # Check battery percentage
150
+ is_ok = battery.percent >= self.min_battery_percent
151
+
152
+ if not is_ok:
153
+ logger.debug(f"Battery too low: {battery.percent:.1f}% (min={self.min_battery_percent}%)")
154
+
155
+ return is_ok
156
+
157
+ except Exception as e:
158
+ logger.error(f"Failed to check battery: {e}")
159
+ return True # Assume OK on error (might be desktop)
160
+
161
+ async def _check_and_run_tasks(self):
162
+ """Check and run scheduled tasks if conditions are met."""
163
+ # Check system conditions
164
+ if not self.is_system_idle():
165
+ logger.debug("Skipping scheduled tasks: system not idle")
166
+ return
167
+
168
+ if not self.is_battery_ok():
169
+ logger.debug("Skipping scheduled tasks: battery too low")
170
+ return
171
+
172
+ # Check each task
173
+ now = datetime.utcnow()
174
+
175
+ for task_name, task in self.scheduled_tasks.items():
176
+ # Check if task is due
177
+ if task["last_run"] is not None:
178
+ time_since_last_run = now - task["last_run"]
179
+ if time_since_last_run < task["interval"]:
180
+ continue
181
+
182
+ # Run task
183
+ logger.info(f"Running scheduled task: {task_name}")
184
+
185
+ try:
186
+ result = await task["fn"]()
187
+ task["last_run"] = now
188
+ self.last_run[task_name] = now.isoformat()
189
+ logger.info(f"Task completed: {task_name}")
190
+
191
+ except Exception as e:
192
+ logger.error(f"Task failed: {task_name}: {e}")
193
+ # Still update last_run to avoid retry loop
194
+ task["last_run"] = now
195
+ self.last_run[task_name] = now.isoformat()
196
+
197
+ def get_status(self) -> Dict[str, Any]:
198
+ """Get scheduler status."""
199
+ tasks_status = []
200
+
201
+ for task_name, task in self.scheduled_tasks.items():
202
+ last_run = task["last_run"]
203
+ next_run = None
204
+
205
+ if last_run is not None:
206
+ next_run = (last_run + task["interval"]).isoformat()
207
+
208
+ tasks_status.append({
209
+ "name": task_name,
210
+ "interval_hours": task["interval"].total_seconds() / 3600,
211
+ "last_run": last_run.isoformat() if last_run else None,
212
+ "next_run": next_run,
213
+ })
214
+
215
+ return {
216
+ "running": self.running,
217
+ "system_idle": self.is_system_idle(),
218
+ "battery_ok": self.is_battery_ok(),
219
+ "tasks": tasks_status,
220
+ }
backend/app/services/learning/skill_distiller.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Skill distillation from repeated patterns in case execution.
3
+
4
+ Detects patterns in successful cases and distills them into reusable skills.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from typing import Dict, Any, Optional, List
12
+ from collections import Counter
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class SkillDistiller:
18
+ """Distills skills from execution patterns."""
19
+
20
+ def __init__(self, data_dir: str, model_fn):
21
+ self.data_dir = Path(data_dir) / "skills"
22
+ self.data_dir.mkdir(parents=True, exist_ok=True)
23
+ self.model_fn = model_fn
24
+
25
+ def detect_skill_candidates(self, cases: List[Dict[str, Any]], min_frequency: int = 3) -> List[Dict[str, Any]]:
26
+ """
27
+ Detect skill candidates from case patterns.
28
+
29
+ Args:
30
+ cases: List of case records
31
+ min_frequency: Minimum frequency for a pattern to be considered
32
+
33
+ Returns:
34
+ List of skill candidates
35
+ """
36
+ logger.info(f"Detecting skill candidates from {len(cases)} cases")
37
+
38
+ # Extract patterns
39
+ domain_patterns = Counter()
40
+ source_patterns = Counter()
41
+ agent_patterns = Counter()
42
+
43
+ for case in cases:
44
+ route = case.get("route", {})
45
+ domain = route.get("domain_pack")
46
+ if domain:
47
+ domain_patterns[domain] += 1
48
+
49
+ # Extract sources from research output
50
+ outputs = case.get("outputs", {})
51
+ research_output = outputs.get("research", {})
52
+ sources = research_output.get("sources", [])
53
+ for source in sources:
54
+ source_patterns[source] += 1
55
+
56
+ # Extract agent sequence
57
+ agent_sequence = tuple(outputs.keys())
58
+ agent_patterns[agent_sequence] += 1
59
+
60
+ # Find frequent patterns
61
+ candidates = []
62
+
63
+ for domain, count in domain_patterns.items():
64
+ if count >= min_frequency:
65
+ candidates.append({
66
+ "type": "domain_expertise",
67
+ "domain": domain,
68
+ "frequency": count,
69
+ "confidence": count / len(cases),
70
+ })
71
+
72
+ for source, count in source_patterns.items():
73
+ if count >= min_frequency:
74
+ candidates.append({
75
+ "type": "preferred_source",
76
+ "source": source,
77
+ "frequency": count,
78
+ "confidence": count / len(cases),
79
+ })
80
+
81
+ for agent_seq, count in agent_patterns.items():
82
+ if count >= min_frequency:
83
+ candidates.append({
84
+ "type": "agent_workflow",
85
+ "agents": list(agent_seq),
86
+ "frequency": count,
87
+ "confidence": count / len(cases),
88
+ })
89
+
90
+ logger.info(f"Detected {len(candidates)} skill candidates")
91
+ return candidates
92
+
93
+ async def distill_skill(self, candidate: Dict[str, Any], example_cases: List[Dict[str, Any]]) -> Dict[str, Any]:
94
+ """
95
+ Distill a skill from a candidate pattern.
96
+
97
+ Args:
98
+ candidate: Skill candidate
99
+ example_cases: Example cases demonstrating the pattern
100
+
101
+ Returns:
102
+ Distilled skill
103
+ """
104
+ logger.info(f"Distilling skill from candidate: {candidate['type']}")
105
+
106
+ # Generate skill description using LLM
107
+ examples_text = "\n\n".join([
108
+ f"Case {i+1}:\nInput: {case.get('user_input', '')}\nOutput: {case.get('final_answer', '')}"
109
+ for i, case in enumerate(example_cases[:3])
110
+ ])
111
+
112
+ distill_prompt = f"""Analyze these successful cases and create a reusable skill description.
113
+
114
+ Pattern type: {candidate['type']}
115
+ Pattern details: {json.dumps(candidate, indent=2)}
116
+
117
+ Example cases:
118
+ {examples_text}
119
+
120
+ Create a skill that includes:
121
+ 1. A clear name
122
+ 2. Trigger patterns (when to use this skill)
123
+ 3. Recommended agents
124
+ 4. Preferred sources
125
+ 5. Expected outcomes
126
+
127
+ Skill (JSON format):"""
128
+
129
+ try:
130
+ skill_text = await self.model_fn(distill_prompt, max_tokens=1000)
131
+
132
+ # Parse skill (basic fallback if LLM doesn't return valid JSON)
133
+ try:
134
+ skill = json.loads(skill_text)
135
+ except json.JSONDecodeError:
136
+ skill = {
137
+ "name": f"{candidate['type']}_skill",
138
+ "description": skill_text,
139
+ "trigger_patterns": [],
140
+ "recommended_agents": [],
141
+ "preferred_sources": [],
142
+ }
143
+
144
+ # Add metadata
145
+ skill["id"] = f"{candidate['type']}_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}"
146
+ skill["type"] = candidate["type"]
147
+ skill["frequency"] = candidate["frequency"]
148
+ skill["confidence"] = candidate["confidence"]
149
+ skill["created_at"] = datetime.utcnow().isoformat()
150
+ skill["usage_count"] = 0
151
+ skill["success_count"] = 0
152
+ skill["success_rate"] = 0.0
153
+
154
+ self._save_skill(skill)
155
+ logger.info(f"Distilled skill: {skill['id']}")
156
+
157
+ return skill
158
+
159
+ except Exception as e:
160
+ logger.error(f"Failed to distill skill: {e}")
161
+ raise
162
+
163
+ async def test_skill(self, skill_id: str, test_case: Dict[str, Any]) -> Dict[str, Any]:
164
+ """
165
+ Test a skill with a test case.
166
+
167
+ Args:
168
+ skill_id: Skill ID
169
+ test_case: Test case
170
+
171
+ Returns:
172
+ Test results
173
+ """
174
+ skill = self._load_skill(skill_id)
175
+ if not skill:
176
+ raise ValueError(f"Skill not found: {skill_id}")
177
+
178
+ logger.info(f"Testing skill: {skill_id}")
179
+
180
+ # Check if skill triggers match test case
181
+ triggers_match = self._check_triggers(skill, test_case)
182
+
183
+ return {
184
+ "skill_id": skill_id,
185
+ "triggers_match": triggers_match,
186
+ "test_case_id": test_case.get("case_id"),
187
+ }
188
+
189
+ def apply_skill(self, skill_id: str, context: Dict[str, Any]) -> Dict[str, Any]:
190
+ """
191
+ Apply a skill to a context.
192
+
193
+ Args:
194
+ skill_id: Skill ID
195
+ context: Execution context
196
+
197
+ Returns:
198
+ Skill application results
199
+ """
200
+ skill = self._load_skill(skill_id)
201
+ if not skill:
202
+ raise ValueError(f"Skill not found: {skill_id}")
203
+
204
+ logger.info(f"Applying skill: {skill_id}")
205
+
206
+ # Update usage stats
207
+ skill["usage_count"] += 1
208
+ self._save_skill(skill)
209
+
210
+ return {
211
+ "skill_id": skill_id,
212
+ "recommended_agents": skill.get("recommended_agents", []),
213
+ "preferred_sources": skill.get("preferred_sources", []),
214
+ "expected_outcomes": skill.get("expected_outcomes", []),
215
+ }
216
+
217
+ def record_skill_outcome(self, skill_id: str, success: bool):
218
+ """Record the outcome of a skill application."""
219
+ skill = self._load_skill(skill_id)
220
+ if not skill:
221
+ return
222
+
223
+ if success:
224
+ skill["success_count"] += 1
225
+
226
+ if skill["usage_count"] > 0:
227
+ skill["success_rate"] = skill["success_count"] / skill["usage_count"]
228
+
229
+ self._save_skill(skill)
230
+ logger.info(f"Recorded skill outcome: {skill_id} (success={success})")
231
+
232
+ def list_skills(self) -> List[Dict[str, Any]]:
233
+ """List all skills."""
234
+ skills = []
235
+
236
+ for file_path in self.data_dir.glob("*.json"):
237
+ try:
238
+ with open(file_path, 'r') as f:
239
+ skill = json.load(f)
240
+ skills.append(skill)
241
+ except Exception as e:
242
+ logger.error(f"Failed to read skill {file_path}: {e}")
243
+
244
+ # Sort by success rate descending
245
+ skills.sort(key=lambda x: x.get("success_rate", 0), reverse=True)
246
+ return skills
247
+
248
+ def get_skill(self, skill_id: str) -> Optional[Dict[str, Any]]:
249
+ """Get a skill by ID."""
250
+ return self._load_skill(skill_id)
251
+
252
+ def _save_skill(self, skill: Dict[str, Any]):
253
+ """Save a skill to disk."""
254
+ file_path = self.data_dir / f"{skill['id']}.json"
255
+ with open(file_path, 'w') as f:
256
+ json.dump(skill, f, indent=2)
257
+
258
+ def _load_skill(self, skill_id: str) -> Optional[Dict[str, Any]]:
259
+ """Load a skill from disk."""
260
+ file_path = self.data_dir / f"{skill_id}.json"
261
+ if not file_path.exists():
262
+ return None
263
+
264
+ with open(file_path, 'r') as f:
265
+ return json.load(f)
266
+
267
+ def _check_triggers(self, skill: Dict[str, Any], test_case: Dict[str, Any]) -> bool:
268
+ """Check if skill triggers match a test case."""
269
+ trigger_patterns = skill.get("trigger_patterns", [])
270
+ if not trigger_patterns:
271
+ return True # No triggers means always applicable
272
+
273
+ user_input = test_case.get("user_input", "").lower()
274
+
275
+ for pattern in trigger_patterns:
276
+ if pattern.lower() in user_input:
277
+ return True
278
+
279
+ return False
backend/app/services/learning/trust_manager.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Trust and freshness management for sources and knowledge.
3
+
4
+ Tracks source reliability and content freshness over time.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+ from typing import Dict, Any, Optional, List
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class TrustManager:
17
+ """Manages trust scores and freshness for sources."""
18
+
19
+ def __init__(self, data_dir: str):
20
+ self.data_dir = Path(data_dir) / "learning"
21
+ self.data_dir.mkdir(parents=True, exist_ok=True)
22
+ self.trust_file = self.data_dir / "source_trust.json"
23
+ self.freshness_file = self.data_dir / "freshness_scores.json"
24
+
25
+ # Initialize files if they don't exist
26
+ if not self.trust_file.exists():
27
+ self._save_trust_data({})
28
+ if not self.freshness_file.exists():
29
+ self._save_freshness_data({})
30
+
31
+ def get_trust_score(self, source: str) -> float:
32
+ """
33
+ Get trust score for a source.
34
+
35
+ Args:
36
+ source: Source identifier (URL or name)
37
+
38
+ Returns:
39
+ Trust score (0.0 to 1.0)
40
+ """
41
+ trust_data = self._load_trust_data()
42
+ source_data = trust_data.get(source, {})
43
+ return source_data.get("trust_score", 0.5) # Default to neutral
44
+
45
+ def update_trust(self, source: str, verification_outcome: bool, weight: float = 1.0):
46
+ """
47
+ Update trust score based on verification outcome.
48
+
49
+ Args:
50
+ source: Source identifier
51
+ verification_outcome: True if verified, False if not
52
+ weight: Weight of this update (0.0 to 1.0)
53
+ """
54
+ trust_data = self._load_trust_data()
55
+
56
+ if source not in trust_data:
57
+ trust_data[source] = {
58
+ "trust_score": 0.5,
59
+ "verification_count": 0,
60
+ "success_count": 0,
61
+ "last_updated": datetime.utcnow().isoformat(),
62
+ }
63
+
64
+ source_data = trust_data[source]
65
+
66
+ # Update counts
67
+ source_data["verification_count"] += 1
68
+ if verification_outcome:
69
+ source_data["success_count"] += 1
70
+
71
+ # Calculate new trust score using exponential moving average
72
+ current_score = source_data["trust_score"]
73
+ outcome_score = 1.0 if verification_outcome else 0.0
74
+ alpha = 0.1 * weight # Learning rate
75
+ new_score = (1 - alpha) * current_score + alpha * outcome_score
76
+
77
+ source_data["trust_score"] = new_score
78
+ source_data["last_updated"] = datetime.utcnow().isoformat()
79
+
80
+ trust_data[source] = source_data
81
+ self._save_trust_data(trust_data)
82
+
83
+ logger.info(f"Updated trust for {source}: {new_score:.3f} (outcome={verification_outcome})")
84
+
85
+ def list_trusted_sources(self, min_trust: float = 0.7, min_verifications: int = 3) -> List[Dict[str, Any]]:
86
+ """
87
+ List trusted sources.
88
+
89
+ Args:
90
+ min_trust: Minimum trust score
91
+ min_verifications: Minimum number of verifications
92
+
93
+ Returns:
94
+ List of trusted sources
95
+ """
96
+ trust_data = self._load_trust_data()
97
+
98
+ trusted = []
99
+ for source, data in trust_data.items():
100
+ if data["trust_score"] >= min_trust and data["verification_count"] >= min_verifications:
101
+ trusted.append({
102
+ "source": source,
103
+ "trust_score": data["trust_score"],
104
+ "verification_count": data["verification_count"],
105
+ "success_rate": data["success_count"] / data["verification_count"],
106
+ })
107
+
108
+ # Sort by trust score descending
109
+ trusted.sort(key=lambda x: x["trust_score"], reverse=True)
110
+ return trusted
111
+
112
+ def list_untrusted_sources(self, max_trust: float = 0.3, min_verifications: int = 3) -> List[Dict[str, Any]]:
113
+ """
114
+ List untrusted sources.
115
+
116
+ Args:
117
+ max_trust: Maximum trust score
118
+ min_verifications: Minimum number of verifications
119
+
120
+ Returns:
121
+ List of untrusted sources
122
+ """
123
+ trust_data = self._load_trust_data()
124
+
125
+ untrusted = []
126
+ for source, data in trust_data.items():
127
+ if data["trust_score"] <= max_trust and data["verification_count"] >= min_verifications:
128
+ untrusted.append({
129
+ "source": source,
130
+ "trust_score": data["trust_score"],
131
+ "verification_count": data["verification_count"],
132
+ "success_rate": data["success_count"] / data["verification_count"],
133
+ })
134
+
135
+ # Sort by trust score ascending
136
+ untrusted.sort(key=lambda x: x["trust_score"])
137
+ return untrusted
138
+
139
+ def calculate_freshness(self, item: Dict[str, Any], domain: Optional[str] = None) -> float:
140
+ """
141
+ Calculate freshness score for a knowledge item.
142
+
143
+ Args:
144
+ item: Knowledge item
145
+ domain: Domain for domain-specific rules
146
+
147
+ Returns:
148
+ Freshness score (0.0 to 1.0)
149
+ """
150
+ # Get age in days
151
+ saved_at = datetime.fromisoformat(item.get("saved_at", datetime.utcnow().isoformat()))
152
+ age_days = (datetime.utcnow() - saved_at).days
153
+
154
+ # Domain-specific expiration rules
155
+ if domain == "finance":
156
+ # Financial data expires quickly
157
+ half_life_days = 7 # 50% fresh after 7 days
158
+ else:
159
+ # General knowledge expires slowly
160
+ half_life_days = 30 # 50% fresh after 30 days
161
+
162
+ # Calculate freshness using exponential decay
163
+ freshness = 2 ** (-age_days / half_life_days)
164
+
165
+ return max(0.0, min(1.0, freshness))
166
+
167
+ def update_freshness(self, item_id: str, freshness_score: float):
168
+ """
169
+ Update freshness score for an item.
170
+
171
+ Args:
172
+ item_id: Item ID
173
+ freshness_score: New freshness score
174
+ """
175
+ freshness_data = self._load_freshness_data()
176
+
177
+ freshness_data[item_id] = {
178
+ "freshness_score": freshness_score,
179
+ "last_updated": datetime.utcnow().isoformat(),
180
+ }
181
+
182
+ self._save_freshness_data(freshness_data)
183
+
184
+ def get_stale_items(self, items: List[Dict[str, Any]], threshold: float = 0.3) -> List[Dict[str, Any]]:
185
+ """
186
+ Get stale items that need refreshing.
187
+
188
+ Args:
189
+ items: List of knowledge items
190
+ threshold: Freshness threshold
191
+
192
+ Returns:
193
+ List of stale items
194
+ """
195
+ stale = []
196
+
197
+ for item in items:
198
+ freshness = self.calculate_freshness(item)
199
+ if freshness < threshold:
200
+ stale.append({
201
+ "item_id": item.get("id"),
202
+ "title": item.get("title"),
203
+ "freshness": freshness,
204
+ "age_days": (datetime.utcnow() - datetime.fromisoformat(item.get("saved_at", datetime.utcnow().isoformat()))).days,
205
+ })
206
+
207
+ # Sort by freshness ascending (stalest first)
208
+ stale.sort(key=lambda x: x["freshness"])
209
+ return stale
210
+
211
+ def recommend_refresh(self, stale_items: List[Dict[str, Any]], max_recommendations: int = 10) -> List[Dict[str, Any]]:
212
+ """
213
+ Recommend items to refresh.
214
+
215
+ Args:
216
+ stale_items: List of stale items
217
+ max_recommendations: Maximum number of recommendations
218
+
219
+ Returns:
220
+ List of recommended items to refresh
221
+ """
222
+ # Prioritize by staleness and importance
223
+ recommendations = stale_items[:max_recommendations]
224
+
225
+ return recommendations
226
+
227
+ def _load_trust_data(self) -> Dict[str, Any]:
228
+ """Load trust data from disk."""
229
+ with open(self.trust_file, 'r') as f:
230
+ return json.load(f)
231
+
232
+ def _save_trust_data(self, data: Dict[str, Any]):
233
+ """Save trust data to disk."""
234
+ with open(self.trust_file, 'w') as f:
235
+ json.dump(data, f, indent=2)
236
+
237
+ def _load_freshness_data(self) -> Dict[str, Any]:
238
+ """Load freshness data from disk."""
239
+ with open(self.freshness_file, 'r') as f:
240
+ return json.load(f)
241
+
242
+ def _save_freshness_data(self, data: Dict[str, Any]):
243
+ """Save freshness data to disk."""
244
+ with open(self.freshness_file, 'w') as f:
245
+ json.dump(data, f, indent=2)
backend/app/services/mirofish_client.py CHANGED
@@ -1,15 +1,45 @@
1
- from typing import Any, Dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import httpx
4
 
5
- from app.config import (
6
- MIROFISH_API_BASE,
7
- MIROFISH_TIMEOUT_SECONDS,
8
- MIROFISH_HEALTH_PATH,
9
- MIROFISH_RUN_PATH,
10
- MIROFISH_STATUS_PATH,
11
- MIROFISH_REPORT_PATH,
12
- MIROFISH_CHAT_PATH,
 
 
13
  )
14
 
15
 
@@ -17,71 +47,225 @@ class MiroFishError(Exception):
17
  pass
18
 
19
 
20
- def _build_url(path: str, simulation_id: str | None = None) -> str:
21
- if simulation_id is not None:
22
- path = path.replace("{id}", simulation_id)
23
- return f"{MIROFISH_API_BASE.rstrip('/')}{path}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def mirofish_health() -> Dict[str, Any]:
27
  try:
28
  with httpx.Client(timeout=10) as client:
29
- response = client.get(_build_url(MIROFISH_HEALTH_PATH))
30
  return {
31
  "reachable": response.status_code < 500,
32
  "status_code": response.status_code,
33
- "body": response.text[:500],
34
  }
35
  except Exception as e:
36
- return {
37
- "reachable": False,
38
- "status_code": None,
39
- "body": str(e),
40
- }
41
 
42
 
 
 
43
  def run_simulation(payload: Dict[str, Any]) -> Dict[str, Any]:
44
- try:
45
- with httpx.Client(timeout=MIROFISH_TIMEOUT_SECONDS) as client:
46
- response = client.post(_build_url(MIROFISH_RUN_PATH), json=payload)
47
- if response.status_code >= 400:
48
- raise MiroFishError(f"MiroFish run error {response.status_code}: {response.text}")
49
- return response.json()
50
- except Exception as e:
51
- raise MiroFishError(str(e))
 
 
 
 
 
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def simulation_status(simulation_id: str) -> Dict[str, Any]:
55
- try:
56
- with httpx.Client(timeout=30) as client:
57
- response = client.get(_build_url(MIROFISH_STATUS_PATH, simulation_id))
58
- if response.status_code >= 400:
59
- raise MiroFishError(f"MiroFish status error {response.status_code}: {response.text}")
60
- return response.json()
61
- except Exception as e:
62
- raise MiroFishError(str(e))
63
 
64
 
65
  def simulation_report(simulation_id: str) -> Dict[str, Any]:
 
 
66
  try:
67
- with httpx.Client(timeout=30) as client:
68
- response = client.get(_build_url(MIROFISH_REPORT_PATH, simulation_id))
69
- if response.status_code >= 400:
70
- raise MiroFishError(f"MiroFish report error {response.status_code}: {response.text}")
71
- return response.json()
72
- except Exception as e:
73
- raise MiroFishError(str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  def simulation_chat(simulation_id: str, message: str) -> Dict[str, Any]:
 
 
77
  try:
78
- with httpx.Client(timeout=60) as client:
79
- response = client.post(
80
- _build_url(MIROFISH_CHAT_PATH, simulation_id),
81
- json={"message": message},
82
- )
83
- if response.status_code >= 400:
84
- raise MiroFishError(f"MiroFish chat error {response.status_code}: {response.text}")
85
- return response.json()
86
- except Exception as e:
87
- raise MiroFishError(str(e))
 
 
 
 
1
+ """
2
+ MiroFish client adapter.
3
+
4
+ MiroFish actual API (all under /api/):
5
+ Graph/Project:
6
+ POST /api/graph/ontology/generate - upload seed file + requirement → project_id
7
+ POST /api/graph/build - build graph → task_id
8
+ GET /api/graph/task/<task_id> - poll build status → graph_id when done
9
+
10
+ Simulation:
11
+ POST /api/simulation/create - create simulation → simulation_id
12
+ POST /api/simulation/prepare - prepare agents
13
+ POST /api/simulation/prepare/status - poll prepare status
14
+ POST /api/simulation/start - run simulation
15
+ GET /api/simulation/<sim_id> - get simulation status
16
+ GET /api/simulation/list - list simulations
17
+
18
+ Report:
19
+ POST /api/report/generate - generate report → report_id
20
+ POST /api/report/generate/status - poll report status
21
+ GET /api/report/<report_id> - get report
22
+ GET /api/report/by-simulation/<sim_id> - get report by simulation
23
+ POST /api/report/chat - chat with report agent
24
+ """
25
+
26
+ import io
27
+ import time
28
+ import logging
29
+ from typing import Any, Dict, Optional
30
 
31
  import httpx
32
 
33
+ from app.config import MIROFISH_API_BASE, MIROFISH_TIMEOUT_SECONDS
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ BASE = MIROFISH_API_BASE.rstrip("/")
38
+
39
+ # Module-level connection pool — reused across all requests
40
+ _http_pool = httpx.Client(
41
+ timeout=MIROFISH_TIMEOUT_SECONDS,
42
+ limits=httpx.Limits(max_connections=10, max_keepalive_connections=5),
43
  )
44
 
45
 
 
47
  pass
48
 
49
 
50
+ def _url(path: str) -> str:
51
+ return f"{BASE}{path}"
52
+
53
+
54
+ def _post(path: str, json: Dict = None, data: Dict = None, files=None, timeout: int = 30) -> Dict:
55
+ try:
56
+ if files:
57
+ # Use a one-off client for multipart uploads (connection pool can't reuse these)
58
+ with httpx.Client(timeout=timeout) as client:
59
+ response = client.post(_url(path), data=data or {}, files=files)
60
+ else:
61
+ response = _http_pool.post(_url(path), json=json or {}, timeout=timeout)
62
+ if response.status_code >= 400:
63
+ raise MiroFishError(f"MiroFish {path} error {response.status_code}: {response.text[:300]}")
64
+ return response.json()
65
+ except MiroFishError:
66
+ raise
67
+ except Exception as e:
68
+ raise MiroFishError(str(e))
69
 
70
 
71
+ def _get(path: str, timeout: int = 30) -> Dict:
72
+ try:
73
+ response = _http_pool.get(_url(path), timeout=timeout)
74
+ if response.status_code >= 400:
75
+ raise MiroFishError(f"MiroFish {path} error {response.status_code}: {response.text[:300]}")
76
+ return response.json()
77
+ except MiroFishError:
78
+ raise
79
+ except Exception as e:
80
+ raise MiroFishError(str(e))
81
+
82
+
83
+ # ── Health ────────────────────────────────────────────────────────────────────
84
+
85
  def mirofish_health() -> Dict[str, Any]:
86
  try:
87
  with httpx.Client(timeout=10) as client:
88
+ response = client.get(_url("/health"))
89
  return {
90
  "reachable": response.status_code < 500,
91
  "status_code": response.status_code,
92
+ "body": response.text[:200],
93
  }
94
  except Exception as e:
95
+ return {"reachable": False, "status_code": None, "body": str(e)}
 
 
 
 
96
 
97
 
98
+ # ── Full simulation workflow ──────────────────────────────────────────────────
99
+
100
  def run_simulation(payload: Dict[str, Any]) -> Dict[str, Any]:
101
+ """
102
+ Orchestrate the full MiroFish workflow:
103
+ 1. Generate ontology (upload seed text as a .txt file)
104
+ 2. Build graph (poll until done)
105
+ 3. Create simulation
106
+ 4. Prepare simulation (poll until done)
107
+ 5. Start simulation
108
+ Returns a dict with simulation_id and status.
109
+ """
110
+ title = payload.get("title", "Simulation")
111
+ seed_text = payload.get("seed_text", payload.get("prediction_goal", ""))
112
+ requirement = payload.get("prediction_goal", title)
113
+
114
+ logger.info(f"MiroFish: starting full workflow for '{title}'")
115
 
116
+ # Step 1 – generate ontology by uploading seed text as a file
117
+ seed_bytes = seed_text.encode("utf-8")
118
+ files = [("files", ("seed.txt", io.BytesIO(seed_bytes), "text/plain"))]
119
+ form_data = {
120
+ "simulation_requirement": requirement,
121
+ "project_name": title,
122
+ }
123
+ ontology_resp = _post("/api/graph/ontology/generate", data=form_data, files=files, timeout=120)
124
+ if not ontology_resp.get("success"):
125
+ raise MiroFishError(f"Ontology generation failed: {ontology_resp.get('error')}")
126
+ project_id = ontology_resp["data"]["project_id"]
127
+ logger.info(f"MiroFish: project created {project_id}")
128
+
129
+ # Step 2 – build graph
130
+ build_resp = _post("/api/graph/build", json={"project_id": project_id}, timeout=120)
131
+ if not build_resp.get("success"):
132
+ raise MiroFishError(f"Graph build failed: {build_resp.get('error')}")
133
+ task_id = build_resp["data"]["task_id"]
134
+ logger.info(f"MiroFish: graph build task {task_id}")
135
+
136
+ # Poll graph build (max 5 min)
137
+ graph_id = _poll_graph_build(task_id, max_wait=300)
138
+ logger.info(f"MiroFish: graph ready {graph_id}")
139
+
140
+ # Step 3 – create simulation
141
+ create_resp = _post("/api/simulation/create", json={"project_id": project_id, "graph_id": graph_id})
142
+ if not create_resp.get("success"):
143
+ raise MiroFishError(f"Simulation create failed: {create_resp.get('error')}")
144
+ simulation_id = create_resp["data"]["simulation_id"]
145
+ logger.info(f"MiroFish: simulation created {simulation_id}")
146
+
147
+ # Step 4 – prepare simulation (async, poll)
148
+ prepare_resp = _post("/api/simulation/prepare", json={"simulation_id": simulation_id}, timeout=120)
149
+ if not prepare_resp.get("success"):
150
+ raise MiroFishError(f"Simulation prepare failed: {prepare_resp.get('error')}")
151
+ prepare_task_id = prepare_resp["data"].get("task_id")
152
+ if prepare_task_id:
153
+ _poll_prepare(simulation_id, prepare_task_id, max_wait=300)
154
+ logger.info(f"MiroFish: simulation prepared")
155
+
156
+ # Step 5 – start simulation
157
+ start_resp = _post("/api/simulation/start", json={"simulation_id": simulation_id}, timeout=60)
158
+ if not start_resp.get("success"):
159
+ raise MiroFishError(f"Simulation start failed: {start_resp.get('error')}")
160
+ logger.info(f"MiroFish: simulation started {simulation_id}")
161
+
162
+ return {
163
+ "simulation_id": simulation_id,
164
+ "project_id": project_id,
165
+ "graph_id": graph_id,
166
+ "status": "running",
167
+ }
168
+
169
+
170
+ def _poll_graph_build(task_id: str, max_wait: int = 300) -> str:
171
+ """Poll graph build task until done, return graph_id. Uses exponential backoff."""
172
+ deadline = time.time() + max_wait
173
+ interval = 2.0 # Start at 2s, double each time, cap at 15s
174
+ while time.time() < deadline:
175
+ resp = _get(f"/api/graph/task/{task_id}")
176
+ data = resp.get("data", {})
177
+ status = data.get("status", "")
178
+ if status == "completed":
179
+ # graph_id may be at top level or nested under "result"
180
+ graph_id = data.get("graph_id") or data.get("result", {}).get("graph_id")
181
+ if not graph_id:
182
+ raise MiroFishError("Graph build completed but no graph_id returned")
183
+ return graph_id
184
+ if status in ("failed", "error"):
185
+ raise MiroFishError(f"Graph build failed: {data.get('error', 'unknown')}")
186
+ logger.debug(f"Graph build polling: status={status}, next check in {interval:.0f}s")
187
+ time.sleep(interval)
188
+ interval = min(interval * 2, 15.0)
189
+ raise MiroFishError("Graph build timed out")
190
+
191
+
192
+ def _poll_prepare(simulation_id: str, task_id: str, max_wait: int = 300):
193
+ """Poll simulation prepare until done. Uses exponential backoff."""
194
+ deadline = time.time() + max_wait
195
+ interval = 2.0
196
+ while time.time() < deadline:
197
+ resp = _post("/api/simulation/prepare/status", json={"task_id": task_id})
198
+ data = resp.get("data", {})
199
+ status = data.get("status", "")
200
+ if status == "completed":
201
+ return
202
+ if status in ("failed", "error"):
203
+ raise MiroFishError(f"Simulation prepare failed: {data.get('error', 'unknown')}")
204
+ logger.debug(f"Prepare polling: status={status}, next check in {interval:.0f}s")
205
+ time.sleep(interval)
206
+ interval = min(interval * 2, 15.0)
207
+ raise MiroFishError("Simulation prepare timed out")
208
+
209
+
210
+ # ── Status / Report / Chat ────────────────────────────────────────────────────
211
 
212
  def simulation_status(simulation_id: str) -> Dict[str, Any]:
213
+ resp = _get(f"/api/simulation/{simulation_id}")
214
+ return resp.get("data", resp)
 
 
 
 
 
 
215
 
216
 
217
  def simulation_report(simulation_id: str) -> Dict[str, Any]:
218
+ """Get or generate report for a simulation."""
219
+ # Try to get existing report first
220
  try:
221
+ resp = _get(f"/api/report/by-simulation/{simulation_id}")
222
+ if resp.get("success") and resp.get("data"):
223
+ return resp["data"]
224
+ except MiroFishError:
225
+ pass
226
+
227
+ # Generate report
228
+ gen_resp = _post("/api/report/generate", json={"simulation_id": simulation_id}, timeout=120)
229
+ if not gen_resp.get("success"):
230
+ raise MiroFishError(f"Report generation failed: {gen_resp.get('error')}")
231
+
232
+ report_id = gen_resp["data"].get("report_id")
233
+ task_id = gen_resp["data"].get("task_id")
234
+
235
+ # Poll if async
236
+ if task_id:
237
+ deadline = time.time() + 300
238
+ while time.time() < deadline:
239
+ status_resp = _post("/api/report/generate/status", json={"task_id": task_id})
240
+ data = status_resp.get("data", {})
241
+ if data.get("status") == "completed":
242
+ report_id = data.get("report_id", report_id)
243
+ break
244
+ if data.get("status") in ("failed", "error"):
245
+ raise MiroFishError(f"Report generation failed: {data.get('error')}")
246
+ time.sleep(5)
247
+
248
+ if report_id:
249
+ report_resp = _get(f"/api/report/{report_id}")
250
+ return report_resp.get("data", report_resp)
251
+
252
+ return gen_resp.get("data", {})
253
 
254
 
255
  def simulation_chat(simulation_id: str, message: str) -> Dict[str, Any]:
256
+ """Chat with the report agent for a simulation."""
257
+ # Get report_id for this simulation
258
  try:
259
+ resp = _get(f"/api/report/by-simulation/{simulation_id}")
260
+ report_id = resp.get("data", {}).get("report_id") if resp.get("success") else None
261
+ except MiroFishError:
262
+ report_id = None
263
+
264
+ chat_payload = {"message": message}
265
+ if report_id:
266
+ chat_payload["report_id"] = report_id
267
+
268
+ resp = _post("/api/report/chat", json=chat_payload, timeout=60)
269
+ if not resp.get("success"):
270
+ raise MiroFishError(f"Chat failed: {resp.get('error')}")
271
+ return resp.get("data", resp)
backend/app/services/sentinel/__init__.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Layer (Layer 6) - Adaptive Maintenance System
3
+
4
+ The Sentinel Layer monitors system health, diagnoses issues, and proposes
5
+ safe corrections to maintain system reliability without breaking anything.
6
+ """
7
+
8
+ __all__ = [
9
+ "SentinelWatcher",
10
+ "SentinelDiagnostician",
11
+ "SentinelPatcher",
12
+ "CapabilityTracker",
13
+ "SentinelEngine",
14
+ ]
backend/app/services/sentinel/capability_tracker.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Capability Tracker - System Capability Metrics
3
+
4
+ Computes and tracks a composite capability score across 6 dimensions.
5
+ Reads existing data directories. Never calls any LLM. Never writes to
6
+ any existing data directory - only writes to data/sentinel/.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ import os
12
+ import uuid
13
+ from datetime import datetime, timedelta
14
+ from pathlib import Path
15
+ from typing import Dict, Any, List, Optional
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # IMPORTANT COMMENT (must appear in source code):
20
+ # agi_progression_index is a VANITY METRIC for developer motivation.
21
+ # It is a weighted composite of system health indicators:
22
+ # error rate, confidence scores, trust quality, skill growth, etc.
23
+ # It does NOT measure general intelligence, emergent reasoning, or AGI.
24
+ # The name is aspirational and intentionally optimistic, not scientific.
25
+
26
+
27
+ class CapabilitySnapshot:
28
+ """Represents a snapshot of system capabilities."""
29
+
30
+ def __init__(
31
+ self,
32
+ scores: Dict[str, float],
33
+ agi_progression_index: float,
34
+ delta_from_last: Dict[str, float]
35
+ ):
36
+ self.snapshot_id = str(uuid.uuid4())
37
+ self.timestamp = datetime.utcnow().isoformat()
38
+ self.scores = scores
39
+ self.agi_progression_index = agi_progression_index
40
+ self.delta_from_last = delta_from_last
41
+
42
+ def to_dict(self) -> Dict[str, Any]:
43
+ return {
44
+ "snapshot_id": self.snapshot_id,
45
+ "timestamp": self.timestamp,
46
+ "scores": self.scores,
47
+ "agi_progression_index": self.agi_progression_index,
48
+ "delta_from_last": self.delta_from_last,
49
+ }
50
+
51
+
52
+ class CapabilityTracker:
53
+ """Tracks system capability metrics over time."""
54
+
55
+ def __init__(self):
56
+ self.data_dir = Path("backend/app/data")
57
+ self.sentinel_dir = self.data_dir / "sentinel"
58
+ self.sentinel_dir.mkdir(parents=True, exist_ok=True)
59
+
60
+ def snapshot(self) -> CapabilitySnapshot:
61
+ """
62
+ Compute current capability snapshot.
63
+
64
+ Returns:
65
+ CapabilitySnapshot with all dimensions and AGI index
66
+ """
67
+ # Compute all 6 dimensions
68
+ scores = {
69
+ "reasoning_depth": self._compute_reasoning_depth(),
70
+ "source_trust_avg": self._compute_source_trust_avg(),
71
+ "skill_coverage": self._compute_skill_coverage(),
72
+ "prompt_win_rate_avg": self._compute_prompt_win_rate_avg(),
73
+ "stability": self._compute_stability(),
74
+ "self_correction_rate": self._compute_self_correction_rate(),
75
+ }
76
+
77
+ # Compute AGI progression index
78
+ agi_index = self._compute_agi_index(scores)
79
+
80
+ # Get delta from last snapshot
81
+ delta_from_last = self._compute_delta(scores, agi_index)
82
+
83
+ # Create snapshot
84
+ snapshot = CapabilitySnapshot(
85
+ scores=scores,
86
+ agi_progression_index=agi_index,
87
+ delta_from_last=delta_from_last
88
+ )
89
+
90
+ # Save to history
91
+ self._save_snapshot(snapshot)
92
+
93
+ return snapshot
94
+
95
+ def trend(self, days: int = 7) -> List[Dict[str, Any]]:
96
+ """
97
+ Get capability trend over specified days.
98
+
99
+ Args:
100
+ days: Number of days to look back
101
+
102
+ Returns:
103
+ List of CapabilitySnapshot dicts
104
+ """
105
+ try:
106
+ history_file = self.sentinel_dir / "capability_history.json"
107
+
108
+ if not history_file.exists():
109
+ return []
110
+
111
+ with open(history_file, 'r') as f:
112
+ history = json.load(f)
113
+
114
+ # Filter to last N days
115
+ cutoff = datetime.utcnow() - timedelta(days=days)
116
+
117
+ filtered = []
118
+ for entry in history:
119
+ try:
120
+ entry_time = datetime.fromisoformat(entry["timestamp"])
121
+ if entry_time >= cutoff:
122
+ filtered.append(entry)
123
+ except:
124
+ continue
125
+
126
+ return filtered
127
+
128
+ except Exception as e:
129
+ logger.error(f"Failed to get capability trend: {e}")
130
+ return []
131
+
132
+ def _compute_reasoning_depth(self) -> float:
133
+ """Compute reasoning depth from case confidence scores."""
134
+ try:
135
+ memory_dir = self.data_dir / "memory"
136
+
137
+ if not memory_dir.exists():
138
+ return 0.5
139
+
140
+ case_files = sorted(memory_dir.glob("*.json"), key=os.path.getmtime, reverse=True)[:30]
141
+
142
+ if not case_files:
143
+ return 0.5
144
+
145
+ all_confidences = []
146
+
147
+ for case_file in case_files:
148
+ try:
149
+ with open(case_file, 'r') as f:
150
+ case_data = json.load(f)
151
+
152
+ outputs = case_data.get("outputs", [])
153
+
154
+ for output in outputs:
155
+ if isinstance(output, dict):
156
+ confidence = output.get("confidence")
157
+ if confidence is not None:
158
+ all_confidences.append(confidence)
159
+ except:
160
+ continue
161
+
162
+ if not all_confidences:
163
+ return 0.5
164
+
165
+ return sum(all_confidences) / len(all_confidences)
166
+
167
+ except Exception as e:
168
+ logger.warning(f"Failed to compute reasoning depth: {e}")
169
+ return 0.5
170
+
171
+ def _compute_source_trust_avg(self) -> float:
172
+ """Compute average source trust score."""
173
+ try:
174
+ config_file = self.sentinel_dir / "sentinel_config.json"
175
+
176
+ if not config_file.exists():
177
+ return 0.5
178
+
179
+ with open(config_file, 'r') as f:
180
+ config = json.load(f)
181
+
182
+ trust_scores = config.get("source_trust_scores", {})
183
+
184
+ if not trust_scores:
185
+ return 0.5
186
+
187
+ values = [v for v in trust_scores.values() if isinstance(v, (int, float))]
188
+
189
+ if not values:
190
+ return 0.5
191
+
192
+ return sum(values) / len(values)
193
+
194
+ except Exception as e:
195
+ logger.warning(f"Failed to compute source trust avg: {e}")
196
+ return 0.5
197
+
198
+ def _compute_skill_coverage(self) -> float:
199
+ """Compute skill coverage metric."""
200
+ try:
201
+ skills_dir = self.data_dir / "skills"
202
+
203
+ if not skills_dir.exists():
204
+ return 0.0
205
+
206
+ skill_count = len(list(skills_dir.glob("*.json")))
207
+
208
+ # Normalize to 0-1 scale (20 skills = 1.0)
209
+ return min(skill_count / 20.0, 1.0)
210
+
211
+ except Exception as e:
212
+ logger.warning(f"Failed to compute skill coverage: {e}")
213
+ return 0.0
214
+
215
+ def _compute_prompt_win_rate_avg(self) -> float:
216
+ """Compute average prompt win rate."""
217
+ try:
218
+ prompt_versions_dir = self.data_dir / "prompt_versions"
219
+
220
+ if not prompt_versions_dir.exists():
221
+ return 0.5
222
+
223
+ win_rates = []
224
+
225
+ for version_file in prompt_versions_dir.glob("*.json"):
226
+ try:
227
+ with open(version_file, 'r') as f:
228
+ version_data = json.load(f)
229
+
230
+ win_rate = version_data.get("win_rate")
231
+ if win_rate is not None:
232
+ win_rates.append(win_rate)
233
+ except:
234
+ continue
235
+
236
+ if not win_rates:
237
+ return 0.5
238
+
239
+ return sum(win_rates) / len(win_rates)
240
+
241
+ except Exception as e:
242
+ logger.warning(f"Failed to compute prompt win rate avg: {e}")
243
+ return 0.5
244
+
245
+ def _compute_stability(self) -> float:
246
+ """Compute stability (1.0 - error_rate)."""
247
+ try:
248
+ memory_dir = self.data_dir / "memory"
249
+
250
+ if not memory_dir.exists():
251
+ return 1.0
252
+
253
+ # Get cases from last 7 days
254
+ cutoff = datetime.utcnow() - timedelta(days=7)
255
+
256
+ total_count = 0
257
+ failed_count = 0
258
+
259
+ for case_file in memory_dir.glob("*.json"):
260
+ try:
261
+ # Check file modification time
262
+ mtime = datetime.fromtimestamp(case_file.stat().st_mtime)
263
+ if mtime < cutoff:
264
+ continue
265
+
266
+ with open(case_file, 'r') as f:
267
+ case_data = json.load(f)
268
+
269
+ total_count += 1
270
+
271
+ # Check if case failed
272
+ final_answer = case_data.get("final_answer", "")
273
+ outputs = case_data.get("outputs", [])
274
+
275
+ if not final_answer or not outputs:
276
+ failed_count += 1
277
+ except:
278
+ continue
279
+
280
+ if total_count == 0:
281
+ return 1.0
282
+
283
+ error_rate = failed_count / total_count
284
+ return 1.0 - error_rate
285
+
286
+ except Exception as e:
287
+ logger.warning(f"Failed to compute stability: {e}")
288
+ return 1.0
289
+
290
+ def _compute_self_correction_rate(self) -> float:
291
+ """Compute self-correction rate."""
292
+ try:
293
+ # Get patches from last 7 days
294
+ patch_history_file = self.sentinel_dir / "patch_history.json"
295
+ alert_history_file = self.sentinel_dir / "alert_history.json"
296
+
297
+ cutoff = datetime.utcnow() - timedelta(days=7)
298
+
299
+ patch_count = 0
300
+ if patch_history_file.exists():
301
+ with open(patch_history_file, 'r') as f:
302
+ patches = json.load(f)
303
+
304
+ for patch in patches:
305
+ try:
306
+ patch_time = datetime.fromisoformat(patch["timestamp"])
307
+ if patch_time >= cutoff:
308
+ patch_count += 1
309
+ except:
310
+ continue
311
+
312
+ alert_count = 0
313
+ if alert_history_file.exists():
314
+ with open(alert_history_file, 'r') as f:
315
+ alerts = json.load(f)
316
+
317
+ for alert in alerts:
318
+ try:
319
+ alert_time = datetime.fromisoformat(alert["timestamp"])
320
+ if alert_time >= cutoff:
321
+ alert_count += 1
322
+ except:
323
+ continue
324
+
325
+ if alert_count == 0:
326
+ return 0.0
327
+
328
+ return min(patch_count / alert_count, 1.0)
329
+
330
+ except Exception as e:
331
+ logger.warning(f"Failed to compute self-correction rate: {e}")
332
+ return 0.0
333
+
334
+ def _compute_agi_index(self, scores: Dict[str, float]) -> float:
335
+ """
336
+ Compute AGI progression index from dimension scores.
337
+
338
+ Formula:
339
+ index = (
340
+ (reasoning_depth * 0.25) +
341
+ (source_trust_avg * 0.15) +
342
+ (skill_coverage * 0.20) +
343
+ (prompt_win_rate_avg * 0.20) +
344
+ (stability * 0.10) +
345
+ (self_correction_rate * 0.10)
346
+ )
347
+ """
348
+ index = (
349
+ (scores.get("reasoning_depth", 0.5) * 0.25) +
350
+ (scores.get("source_trust_avg", 0.5) * 0.15) +
351
+ (scores.get("skill_coverage", 0.0) * 0.20) +
352
+ (scores.get("prompt_win_rate_avg", 0.5) * 0.20) +
353
+ (scores.get("stability", 1.0) * 0.10) +
354
+ (scores.get("self_correction_rate", 0.0) * 0.10)
355
+ )
356
+
357
+ # Clamp to [0.0, 1.0]
358
+ return max(0.0, min(1.0, index))
359
+
360
+ def _compute_delta(self, scores: Dict[str, float], agi_index: float) -> Dict[str, float]:
361
+ """Compute delta from last snapshot."""
362
+ try:
363
+ history_file = self.sentinel_dir / "capability_history.json"
364
+
365
+ if not history_file.exists():
366
+ return {k: 0.0 for k in scores.keys()}
367
+
368
+ with open(history_file, 'r') as f:
369
+ history = json.load(f)
370
+
371
+ if not history:
372
+ return {k: 0.0 for k in scores.keys()}
373
+
374
+ last_snapshot = history[-1]
375
+ last_scores = last_snapshot.get("scores", {})
376
+
377
+ delta = {}
378
+ for key, value in scores.items():
379
+ last_value = last_scores.get(key, value)
380
+ delta[key] = value - last_value
381
+
382
+ delta["agi_progression_index"] = agi_index - last_snapshot.get("agi_progression_index", agi_index)
383
+
384
+ return delta
385
+
386
+ except Exception as e:
387
+ logger.warning(f"Failed to compute delta: {e}")
388
+ return {k: 0.0 for k in scores.keys()}
389
+
390
+ def _save_snapshot(self, snapshot: CapabilitySnapshot):
391
+ """Save snapshot to history."""
392
+ try:
393
+ history_file = self.sentinel_dir / "capability_history.json"
394
+
395
+ # Load existing history
396
+ if history_file.exists():
397
+ with open(history_file, 'r') as f:
398
+ history = json.load(f)
399
+ else:
400
+ history = []
401
+
402
+ # Append new snapshot
403
+ history.append(snapshot.to_dict())
404
+
405
+ # Keep only last 500 entries
406
+ if len(history) > 500:
407
+ history = history[-500:]
408
+
409
+ # Save back
410
+ with open(history_file, 'w') as f:
411
+ json.dump(history, f, indent=2)
412
+
413
+ except Exception as e:
414
+ logger.error(f"Failed to save capability snapshot: {e}")
backend/app/services/sentinel/diagnostician.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Diagnostician - Issue Diagnosis using LLM
3
+
4
+ Takes a SentinelAlert, asks the existing free LLM to diagnose it,
5
+ returns a structured SentinelDiagnosis.
6
+
7
+ CRITICAL: Uses ONLY call_model() from app.agents._model for all LLM calls.
8
+ """
9
+
10
+ import json
11
+ import logging
12
+ import uuid
13
+ from datetime import datetime
14
+ from typing import Dict, Any
15
+
16
+ # MANDATORY IMPORT - use this and only this for LLM calls
17
+ from app.agents._model import call_model
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class SentinelDiagnosis:
23
+ """Represents a diagnosis of a system alert."""
24
+
25
+ def __init__(
26
+ self,
27
+ alert_id: str,
28
+ root_cause: str,
29
+ fix_type: str,
30
+ safe_to_auto_apply: bool,
31
+ proposed_fix: str,
32
+ confidence: float,
33
+ reasoning: str
34
+ ):
35
+ self.diagnosis_id = str(uuid.uuid4())
36
+ self.alert_id = alert_id
37
+ self.root_cause = root_cause
38
+ self.fix_type = fix_type
39
+ self.safe_to_auto_apply = safe_to_auto_apply
40
+ self.proposed_fix = proposed_fix
41
+ self.confidence = confidence
42
+ self.reasoning = reasoning
43
+ self.timestamp = datetime.utcnow().isoformat()
44
+
45
+ def to_dict(self) -> Dict[str, Any]:
46
+ return {
47
+ "diagnosis_id": self.diagnosis_id,
48
+ "alert_id": self.alert_id,
49
+ "root_cause": self.root_cause,
50
+ "fix_type": self.fix_type,
51
+ "safe_to_auto_apply": self.safe_to_auto_apply,
52
+ "proposed_fix": self.proposed_fix,
53
+ "confidence": self.confidence,
54
+ "reasoning": self.reasoning,
55
+ "timestamp": self.timestamp,
56
+ }
57
+
58
+
59
+ class SentinelDiagnostician:
60
+ """Diagnoses system alerts using LLM."""
61
+
62
+ def diagnose(self, alert) -> SentinelDiagnosis:
63
+ """
64
+ Diagnose a system alert using the free LLM.
65
+
66
+ Args:
67
+ alert: SentinelAlert object to diagnose
68
+
69
+ Returns:
70
+ SentinelDiagnosis with root cause and proposed fix
71
+ """
72
+ system_prompt = """You are a system diagnostician for an AI orchestration platform called MiroOrg. Your job is to analyze system alerts and propose safe fixes.
73
+
74
+ You MUST respond with ONLY valid JSON. No markdown. No code fences. No explanation text before or after. Just the raw JSON object.
75
+
76
+ The JSON must have exactly these fields:
77
+ {
78
+ "root_cause": "string describing what caused this issue",
79
+ "fix_type": "one of: config | prompt | logic | dependency | data",
80
+ "safe_to_auto_apply": false,
81
+ "proposed_fix": "string describing the specific fix to apply",
82
+ "confidence": 0.0,
83
+ "reasoning": "string explaining your diagnosis"
84
+ }
85
+
86
+ IMPORTANT RULES for safe_to_auto_apply:
87
+ - Set true ONLY if fix_type is "prompt" (editing a .txt prompt file) or "config" (changing a JSON config value)
88
+ - Set false for fix_type "logic" (Python code changes)
89
+ - Set false for fix_type "dependency" (package changes)
90
+ - Set false for fix_type "data" (data migrations)
91
+ - When in doubt: set false. Human review is always safer.
92
+ - NEVER suggest auto-applying changes to .py files.
93
+
94
+ Set confidence between 0.0 and 1.0 based on how certain you are."""
95
+
96
+ layer_names = {
97
+ 1: "Core Platform",
98
+ 2: "Agents",
99
+ 3: "Domain Packs",
100
+ 4: "Simulation",
101
+ 5: "Knowledge Evolution",
102
+ 6: "Sentinel"
103
+ }
104
+
105
+ prompt = f"""Analyze this system alert and diagnose the root cause:
106
+
107
+ Alert ID: {alert.alert_id}
108
+ Layer: {alert.layer} ({layer_names.get(alert.layer, 'Unknown')})
109
+ Component: {alert.component}
110
+ Issue Type: {alert.issue_type}
111
+ Severity: {alert.severity}
112
+ Evidence: {alert.raw_evidence}
113
+
114
+ What is the most likely root cause and what is the safest fix?
115
+ Remember: respond with ONLY the JSON object, nothing else."""
116
+
117
+ try:
118
+ # Call the existing free LLM
119
+ raw_response = call_model(
120
+ prompt=prompt,
121
+ mode="chat",
122
+ system_prompt=system_prompt
123
+ )
124
+
125
+ # Parse response
126
+ diagnosis_data = self._parse_llm_response(raw_response)
127
+
128
+ if diagnosis_data is None:
129
+ # Parse failed - return fallback diagnosis
130
+ return SentinelDiagnosis(
131
+ alert_id=alert.alert_id,
132
+ root_cause="Could not parse LLM response",
133
+ fix_type="logic",
134
+ safe_to_auto_apply=False,
135
+ proposed_fix="Manual investigation required",
136
+ confidence=0.0,
137
+ reasoning=f"JSON parse failed. Raw response: {raw_response[:200]}"
138
+ )
139
+
140
+ # Create diagnosis from parsed data
141
+ return SentinelDiagnosis(
142
+ alert_id=alert.alert_id,
143
+ root_cause=diagnosis_data.get("root_cause", "Unknown"),
144
+ fix_type=diagnosis_data.get("fix_type", "logic"),
145
+ safe_to_auto_apply=diagnosis_data.get("safe_to_auto_apply", False),
146
+ proposed_fix=diagnosis_data.get("proposed_fix", "No fix proposed"),
147
+ confidence=float(diagnosis_data.get("confidence", 0.0)),
148
+ reasoning=diagnosis_data.get("reasoning", "No reasoning provided")
149
+ )
150
+
151
+ except Exception as e:
152
+ logger.warning(f"Diagnosis failed for alert {alert.alert_id}: {e}")
153
+
154
+ # Return fallback diagnosis
155
+ return SentinelDiagnosis(
156
+ alert_id=alert.alert_id,
157
+ root_cause=f"Diagnosis error: {str(e)}",
158
+ fix_type="logic",
159
+ safe_to_auto_apply=False,
160
+ proposed_fix="Manual investigation required",
161
+ confidence=0.0,
162
+ reasoning=f"Exception during diagnosis: {str(e)}"
163
+ )
164
+
165
+ def _parse_llm_response(self, raw_response: str) -> Dict[str, Any]:
166
+ """
167
+ Parse LLM response, handling markdown and other formatting.
168
+
169
+ Args:
170
+ raw_response: Raw string from LLM
171
+
172
+ Returns:
173
+ Parsed dict or None if parse failed
174
+ """
175
+ try:
176
+ # Strip markdown code fences if present
177
+ cleaned = raw_response.strip()
178
+
179
+ if cleaned.startswith("```json"):
180
+ cleaned = cleaned[7:]
181
+ elif cleaned.startswith("```"):
182
+ cleaned = cleaned[3:]
183
+
184
+ if cleaned.endswith("```"):
185
+ cleaned = cleaned[:-3]
186
+
187
+ cleaned = cleaned.strip()
188
+
189
+ # Parse JSON
190
+ data = json.loads(cleaned)
191
+
192
+ # Validate required fields
193
+ required_fields = ["root_cause", "fix_type", "safe_to_auto_apply", "proposed_fix", "confidence", "reasoning"]
194
+ for field in required_fields:
195
+ if field not in data:
196
+ logger.warning(f"Missing required field in LLM response: {field}")
197
+ return None
198
+
199
+ # Validate fix_type
200
+ valid_fix_types = ["config", "prompt", "logic", "dependency", "data"]
201
+ if data["fix_type"] not in valid_fix_types:
202
+ logger.warning(f"Invalid fix_type: {data['fix_type']}")
203
+ data["fix_type"] = "logic"
204
+
205
+ # Validate confidence
206
+ try:
207
+ data["confidence"] = float(data["confidence"])
208
+ data["confidence"] = max(0.0, min(1.0, data["confidence"]))
209
+ except:
210
+ data["confidence"] = 0.0
211
+
212
+ return data
213
+
214
+ except json.JSONDecodeError as e:
215
+ logger.warning(f"JSON parse error: {e}")
216
+ return None
217
+ except Exception as e:
218
+ logger.warning(f"Unexpected parse error: {e}")
219
+ return None
backend/app/services/sentinel/patcher.py ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Patcher - Safe Patch Application
3
+
4
+ Applies safe patches automatically or queues unsafe ones for human review.
5
+ Never touches .py files. Never calls any LLM.
6
+
7
+ SAFE AUTO-APPLY targets (the ONLY files this class may write to automatically):
8
+ 1. backend/app/prompts/*.txt - prompt text files only
9
+ 2. backend/app/data/sentinel/sentinel_config.json - sentinel config only
10
+
11
+ EVERYTHING ELSE goes to pending_patches/ for human review.
12
+ """
13
+
14
+ import json
15
+ import logging
16
+ import os
17
+ import uuid
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import Dict, Any, Optional
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class PatchResult:
26
+ """Represents the result of a patch application."""
27
+
28
+ def __init__(
29
+ self,
30
+ patch_id: str,
31
+ applied: bool,
32
+ target_file: str,
33
+ change_summary: str,
34
+ requires_human_review: bool
35
+ ):
36
+ self.patch_id = patch_id
37
+ self.applied = applied
38
+ self.target_file = target_file
39
+ self.change_summary = change_summary
40
+ self.requires_human_review = requires_human_review
41
+ self.timestamp = datetime.utcnow().isoformat()
42
+
43
+ def to_dict(self) -> Dict[str, Any]:
44
+ return {
45
+ "patch_id": self.patch_id,
46
+ "applied": self.applied,
47
+ "target_file": self.target_file,
48
+ "change_summary": self.change_summary,
49
+ "requires_human_review": self.requires_human_review,
50
+ "timestamp": self.timestamp,
51
+ }
52
+
53
+
54
+ class SentinelPatcher:
55
+ """Applies safe patches or queues for review."""
56
+
57
+ def __init__(self):
58
+ self.data_dir = Path("backend/app/data")
59
+ self.prompts_dir = Path("backend/app/prompts")
60
+ self.sentinel_dir = self.data_dir / "sentinel"
61
+ self.pending_dir = self.sentinel_dir / "pending_patches"
62
+
63
+ # Ensure directories exist
64
+ self.sentinel_dir.mkdir(parents=True, exist_ok=True)
65
+ self.pending_dir.mkdir(parents=True, exist_ok=True)
66
+
67
+ def apply(self, diagnosis) -> PatchResult:
68
+ """
69
+ Apply a diagnosis as a patch.
70
+
71
+ Args:
72
+ diagnosis: SentinelDiagnosis object
73
+
74
+ Returns:
75
+ PatchResult indicating what happened
76
+ """
77
+ patch_id = str(uuid.uuid4())
78
+ timestamp = datetime.utcnow().isoformat()
79
+
80
+ # Check if safe to auto-apply
81
+ if diagnosis.safe_to_auto_apply and diagnosis.fix_type == "prompt":
82
+ result = self._apply_prompt_patch(patch_id, diagnosis, timestamp)
83
+ elif diagnosis.safe_to_auto_apply and diagnosis.fix_type == "config":
84
+ result = self._apply_config_patch(patch_id, diagnosis, timestamp)
85
+ else:
86
+ # Queue for human review
87
+ result = self._queue_for_review(patch_id, diagnosis, timestamp)
88
+
89
+ # Save to patch history
90
+ self._save_patch_history(result)
91
+
92
+ return result
93
+
94
+ def approve_pending(self, patch_id: str) -> Optional[PatchResult]:
95
+ """
96
+ Approve and apply a pending patch.
97
+
98
+ Args:
99
+ patch_id: ID of pending patch
100
+
101
+ Returns:
102
+ PatchResult or None if not found
103
+ """
104
+ pending_file = self.pending_dir / f"{patch_id}.patch.json"
105
+
106
+ if not pending_file.exists():
107
+ logger.warning(f"Pending patch not found: {patch_id}")
108
+ return None
109
+
110
+ try:
111
+ # Load pending patch
112
+ with open(pending_file, 'r') as f:
113
+ patch_data = json.load(f)
114
+
115
+ diagnosis_data = patch_data.get("diagnosis", {})
116
+ fix_type = diagnosis_data.get("fix_type", "logic")
117
+
118
+ # Refuse to auto-apply logic or dependency changes even with approval
119
+ if fix_type in ("logic", "dependency"):
120
+ result = PatchResult(
121
+ patch_id=patch_id,
122
+ applied=False,
123
+ target_file=f"pending_patches/{patch_id}.patch.json",
124
+ change_summary="Cannot auto-apply logic changes even with approval. Manual code change required.",
125
+ requires_human_review=True
126
+ )
127
+ self._save_patch_history(result)
128
+ return result
129
+
130
+ # Create a mock diagnosis object for re-application
131
+ class MockDiagnosis:
132
+ def __init__(self, data):
133
+ self.diagnosis_id = data.get("diagnosis_id")
134
+ self.alert_id = data.get("alert_id")
135
+ self.root_cause = data.get("root_cause")
136
+ self.fix_type = data.get("fix_type")
137
+ self.safe_to_auto_apply = True # Force to true for approval
138
+ self.proposed_fix = data.get("proposed_fix")
139
+ self.confidence = data.get("confidence")
140
+ self.reasoning = data.get("reasoning")
141
+ self.component = patch_data.get("component", "unknown")
142
+
143
+ diagnosis = MockDiagnosis(diagnosis_data)
144
+
145
+ # Re-run apply logic with safe_to_auto_apply forced to True
146
+ if fix_type == "prompt":
147
+ result = self._apply_prompt_patch(patch_id, diagnosis, datetime.utcnow().isoformat())
148
+ elif fix_type == "config":
149
+ result = self._apply_config_patch(patch_id, diagnosis, datetime.utcnow().isoformat())
150
+ else:
151
+ result = PatchResult(
152
+ patch_id=patch_id,
153
+ applied=False,
154
+ target_file=f"pending_patches/{patch_id}.patch.json",
155
+ change_summary=f"Cannot auto-apply {fix_type} changes",
156
+ requires_human_review=True
157
+ )
158
+
159
+ # Delete pending file if applied
160
+ if result.applied:
161
+ pending_file.unlink()
162
+
163
+ self._save_patch_history(result)
164
+ return result
165
+
166
+ except Exception as e:
167
+ logger.error(f"Failed to approve pending patch {patch_id}: {e}")
168
+ return None
169
+
170
+ def reject_pending(self, patch_id: str) -> bool:
171
+ """
172
+ Reject a pending patch.
173
+
174
+ Args:
175
+ patch_id: ID of pending patch
176
+
177
+ Returns:
178
+ True if rejected, False if not found
179
+ """
180
+ pending_file = self.pending_dir / f"{patch_id}.patch.json"
181
+
182
+ if not pending_file.exists():
183
+ logger.warning(f"Pending patch not found: {patch_id}")
184
+ return False
185
+
186
+ try:
187
+ # Record rejection in history
188
+ result = PatchResult(
189
+ patch_id=patch_id,
190
+ applied=False,
191
+ target_file=f"pending_patches/{patch_id}.patch.json",
192
+ change_summary="Rejected by user",
193
+ requires_human_review=False
194
+ )
195
+ self._save_patch_history(result)
196
+
197
+ # Delete pending file
198
+ pending_file.unlink()
199
+
200
+ logger.info(f"Rejected pending patch: {patch_id}")
201
+ return True
202
+
203
+ except Exception as e:
204
+ logger.error(f"Failed to reject pending patch {patch_id}: {e}")
205
+ return False
206
+
207
+ def _apply_prompt_patch(self, patch_id: str, diagnosis, timestamp: str) -> PatchResult:
208
+ """Apply a prompt patch."""
209
+ try:
210
+ # Find matching prompt file based on component name
211
+ component = getattr(diagnosis, 'component', diagnosis.alert_id)
212
+ matched_file = self._find_prompt_file(component)
213
+
214
+ if matched_file is None:
215
+ # No matching prompt file found - queue for review
216
+ return self._queue_for_review(patch_id, diagnosis, timestamp)
217
+
218
+ # Read current content
219
+ with open(matched_file, 'r') as f:
220
+ current_content = f.read()
221
+
222
+ # Append sentinel guidance
223
+ patch_comment = f"\n\n# [Sentinel patch {patch_id} at {timestamp}]\n"
224
+ patch_comment += f"# Issue: {diagnosis.root_cause}\n"
225
+ patch_comment += f"# Fix applied: {diagnosis.proposed_fix}\n"
226
+
227
+ new_content = current_content + patch_comment
228
+
229
+ # Write back
230
+ with open(matched_file, 'w') as f:
231
+ f.write(new_content)
232
+
233
+ logger.info(f"Applied prompt patch to {matched_file}")
234
+
235
+ return PatchResult(
236
+ patch_id=patch_id,
237
+ applied=True,
238
+ target_file=str(matched_file),
239
+ change_summary=f"Appended sentinel guidance to prompt: {diagnosis.proposed_fix[:100]}",
240
+ requires_human_review=False
241
+ )
242
+
243
+ except Exception as e:
244
+ logger.error(f"Failed to apply prompt patch: {e}")
245
+ return self._queue_for_review(patch_id, diagnosis, timestamp)
246
+
247
+ def _apply_config_patch(self, patch_id: str, diagnosis, timestamp: str) -> PatchResult:
248
+ """Apply a config patch."""
249
+ try:
250
+ config_file = self.sentinel_dir / "sentinel_config.json"
251
+
252
+ # Load existing config
253
+ if config_file.exists():
254
+ with open(config_file, 'r') as f:
255
+ config = json.load(f)
256
+ else:
257
+ config = {}
258
+
259
+ # Add patch
260
+ config[f"patch_{patch_id}"] = diagnosis.proposed_fix
261
+
262
+ # Write back
263
+ with open(config_file, 'w') as f:
264
+ json.dump(config, f, indent=2)
265
+
266
+ logger.info(f"Applied config patch: {patch_id}")
267
+
268
+ return PatchResult(
269
+ patch_id=patch_id,
270
+ applied=True,
271
+ target_file="data/sentinel/sentinel_config.json",
272
+ change_summary=f"Config patch: {diagnosis.proposed_fix[:100]}",
273
+ requires_human_review=False
274
+ )
275
+
276
+ except Exception as e:
277
+ logger.error(f"Failed to apply config patch: {e}")
278
+ return self._queue_for_review(patch_id, diagnosis, timestamp)
279
+
280
+ def _queue_for_review(self, patch_id: str, diagnosis, timestamp: str) -> PatchResult:
281
+ """Queue a patch for human review."""
282
+ try:
283
+ pending_file = self.pending_dir / f"{patch_id}.patch.json"
284
+
285
+ patch_data = {
286
+ "patch_id": patch_id,
287
+ "timestamp": timestamp,
288
+ "diagnosis": diagnosis.to_dict() if hasattr(diagnosis, 'to_dict') else diagnosis,
289
+ "component": getattr(diagnosis, 'component', 'unknown'),
290
+ }
291
+
292
+ with open(pending_file, 'w') as f:
293
+ json.dump(patch_data, f, indent=2)
294
+
295
+ logger.info(f"Queued patch for review: {patch_id}")
296
+
297
+ return PatchResult(
298
+ patch_id=patch_id,
299
+ applied=False,
300
+ target_file=f"pending_patches/{patch_id}.patch.json",
301
+ change_summary=f"Queued for review: {diagnosis.root_cause[:100]}",
302
+ requires_human_review=True
303
+ )
304
+
305
+ except Exception as e:
306
+ logger.error(f"Failed to queue patch for review: {e}")
307
+ return PatchResult(
308
+ patch_id=patch_id,
309
+ applied=False,
310
+ target_file="error",
311
+ change_summary=f"Failed to queue: {str(e)}",
312
+ requires_human_review=True
313
+ )
314
+
315
+ def _find_prompt_file(self, component: str) -> Optional[Path]:
316
+ """Find a prompt file matching the component name."""
317
+ if not self.prompts_dir.exists():
318
+ return None
319
+
320
+ component_lower = component.lower()
321
+
322
+ # Try exact match first
323
+ for prompt_file in self.prompts_dir.glob("*.txt"):
324
+ if component_lower in prompt_file.stem.lower():
325
+ return prompt_file
326
+
327
+ # Try fuzzy match
328
+ for prompt_file in self.prompts_dir.glob("*.txt"):
329
+ stem_lower = prompt_file.stem.lower()
330
+ if any(word in stem_lower for word in component_lower.split()):
331
+ return prompt_file
332
+
333
+ return None
334
+
335
+ def _save_patch_history(self, result: PatchResult):
336
+ """Save patch result to history."""
337
+ try:
338
+ history_file = self.sentinel_dir / "patch_history.json"
339
+
340
+ # Load existing history
341
+ if history_file.exists():
342
+ with open(history_file, 'r') as f:
343
+ history = json.load(f)
344
+ else:
345
+ history = []
346
+
347
+ # Append new result
348
+ history.append(result.to_dict())
349
+
350
+ # Keep only last 200 entries
351
+ if len(history) > 200:
352
+ history = history[-200:]
353
+
354
+ # Save back
355
+ with open(history_file, 'w') as f:
356
+ json.dump(history, f, indent=2)
357
+
358
+ except Exception as e:
359
+ logger.error(f"Failed to save patch history: {e}")
backend/app/services/sentinel/scheduler.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Scheduler - Background Cycle Execution
3
+
4
+ Runs sentinel cycles on a schedule with CPU/battery safeguards.
5
+ Uses asyncio for non-blocking background execution.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import os
11
+ from datetime import datetime, timedelta
12
+ from typing import Optional
13
+
14
+ try:
15
+ import psutil
16
+ except ImportError:
17
+ psutil = None
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # Module-level state
22
+ _scheduler_task: Optional[asyncio.Task] = None
23
+ _scheduler_running: bool = False
24
+
25
+
26
+ async def _sentinel_cycle_loop():
27
+ """Background loop that runs sentinel cycles."""
28
+ from app.services.sentinel.sentinel_engine import SentinelEngine
29
+
30
+ global _scheduler_running
31
+
32
+ engine = SentinelEngine()
33
+ interval_minutes = int(os.getenv("SENTINEL_CYCLE_INTERVAL_MINUTES", "60"))
34
+
35
+ logger.info(f"Sentinel scheduler started (interval: {interval_minutes} minutes)")
36
+
37
+ while _scheduler_running:
38
+ try:
39
+ # Check if sentinel is enabled
40
+ if not os.getenv("SENTINEL_ENABLED", "true").lower() == "true":
41
+ logger.debug("Sentinel disabled, skipping cycle")
42
+ await asyncio.sleep(60)
43
+ continue
44
+
45
+ # Check CPU/battery safeguards
46
+ if not _check_safeguards():
47
+ logger.info("Sentinel cycle skipped due to safeguards (high CPU or low battery)")
48
+ await asyncio.sleep(300) # Wait 5 minutes before checking again
49
+ continue
50
+
51
+ # Run cycle
52
+ logger.info("Starting scheduled sentinel cycle")
53
+ report = engine.run_cycle()
54
+ logger.info(f"Sentinel cycle complete: {report.cycle_id}")
55
+
56
+ # Wait for next cycle
57
+ await asyncio.sleep(interval_minutes * 60)
58
+
59
+ except asyncio.CancelledError:
60
+ logger.info("Sentinel scheduler cancelled")
61
+ break
62
+ except Exception as e:
63
+ logger.error(f"Sentinel cycle error: {e}")
64
+ await asyncio.sleep(300) # Wait 5 minutes on error
65
+
66
+
67
+ def _check_safeguards() -> bool:
68
+ """
69
+ Check CPU and battery safeguards.
70
+
71
+ Returns:
72
+ True if safe to run, False otherwise
73
+ """
74
+ if psutil is None:
75
+ # If psutil not available, assume safe
76
+ return True
77
+
78
+ try:
79
+ # Check CPU usage
80
+ cpu_threshold = float(os.getenv("SENTINEL_CPU_THRESHOLD", "80.0"))
81
+ cpu_percent = psutil.cpu_percent(interval=1)
82
+
83
+ if cpu_percent > cpu_threshold:
84
+ logger.debug(f"CPU usage too high: {cpu_percent}% > {cpu_threshold}%")
85
+ return False
86
+
87
+ # Check battery (if on laptop)
88
+ battery = psutil.sensors_battery()
89
+ if battery is not None:
90
+ battery_threshold = float(os.getenv("SENTINEL_BATTERY_THRESHOLD", "20.0"))
91
+
92
+ if battery.percent < battery_threshold and not battery.power_plugged:
93
+ logger.debug(f"Battery too low: {battery.percent}% < {battery_threshold}%")
94
+ return False
95
+
96
+ return True
97
+
98
+ except Exception as e:
99
+ logger.warning(f"Safeguard check failed: {e}")
100
+ return True # Fail open
101
+
102
+
103
+ def start_sentinel_scheduler():
104
+ """
105
+ Start the sentinel scheduler in the background.
106
+
107
+ This function is called from main.py on app startup.
108
+ """
109
+ global _scheduler_task, _scheduler_running
110
+
111
+ if _scheduler_running:
112
+ logger.warning("Sentinel scheduler already running")
113
+ return
114
+
115
+ _scheduler_running = True
116
+ _scheduler_task = asyncio.create_task(_sentinel_cycle_loop())
117
+ logger.info("Sentinel scheduler task created")
118
+
119
+
120
+ def stop_sentinel_scheduler():
121
+ """
122
+ Stop the sentinel scheduler.
123
+
124
+ This function can be called to gracefully shut down the scheduler.
125
+ """
126
+ global _scheduler_task, _scheduler_running
127
+
128
+ if not _scheduler_running:
129
+ logger.warning("Sentinel scheduler not running")
130
+ return
131
+
132
+ _scheduler_running = False
133
+
134
+ if _scheduler_task:
135
+ _scheduler_task.cancel()
136
+ logger.info("Sentinel scheduler stopped")
137
+
138
+
139
+ def is_scheduler_running() -> bool:
140
+ """Check if scheduler is running."""
141
+ return _scheduler_running
backend/app/services/sentinel/sentinel_engine.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Engine - Core Orchestration
3
+
4
+ Coordinates watcher, diagnostician, patcher, and capability tracker
5
+ to perform complete sentinel cycles.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import uuid
12
+ from datetime import datetime, timedelta
13
+ from pathlib import Path
14
+ from typing import Dict, Any, Optional
15
+
16
+ from app.services.sentinel.watcher import SentinelWatcher
17
+ from app.services.sentinel.diagnostician import SentinelDiagnostician
18
+ from app.services.sentinel.patcher import SentinelPatcher
19
+ from app.services.sentinel.capability_tracker import CapabilityTracker
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Module-level state
24
+ SENTINEL_RUNNING: bool = False
25
+ _last_cycle_at: Optional[str] = None
26
+ _last_cycle_report: Optional[dict] = None
27
+
28
+
29
+ class SentinelCycleReport:
30
+ """Represents the result of a sentinel cycle."""
31
+
32
+ def __init__(
33
+ self,
34
+ cycle_id: str,
35
+ started_at: str,
36
+ completed_at: str,
37
+ alerts_found: int,
38
+ diagnoses_made: int,
39
+ patches_applied: int,
40
+ patches_pending_review: int,
41
+ capability_snapshot
42
+ ):
43
+ self.cycle_id = cycle_id
44
+ self.started_at = started_at
45
+ self.completed_at = completed_at
46
+ self.alerts_found = alerts_found
47
+ self.diagnoses_made = diagnoses_made
48
+ self.patches_applied = patches_applied
49
+ self.patches_pending_review = patches_pending_review
50
+ self.capability_snapshot = capability_snapshot
51
+
52
+ def dict(self) -> Dict[str, Any]:
53
+ return {
54
+ "cycle_id": self.cycle_id,
55
+ "started_at": self.started_at,
56
+ "completed_at": self.completed_at,
57
+ "alerts_found": self.alerts_found,
58
+ "diagnoses_made": self.diagnoses_made,
59
+ "patches_applied": self.patches_applied,
60
+ "patches_pending_review": self.patches_pending_review,
61
+ "capability_snapshot": self.capability_snapshot.to_dict() if hasattr(self.capability_snapshot, 'to_dict') else self.capability_snapshot,
62
+ }
63
+
64
+
65
+ class SentinelEngine:
66
+ """Core sentinel orchestration engine."""
67
+
68
+ def __init__(self):
69
+ self.watcher = SentinelWatcher()
70
+ self.diagnostician = SentinelDiagnostician()
71
+ self.patcher = SentinelPatcher()
72
+ self.tracker = CapabilityTracker()
73
+ self.max_diagnoses = int(os.getenv("SENTINEL_MAX_DIAGNOSES_PER_CYCLE", "5"))
74
+ self.logger = logging.getLogger("sentinel.engine")
75
+ self.data_dir = Path("backend/app/data/sentinel")
76
+ self.data_dir.mkdir(parents=True, exist_ok=True)
77
+
78
+ def run_cycle(self) -> SentinelCycleReport:
79
+ """
80
+ Run a complete sentinel cycle.
81
+
82
+ Returns:
83
+ SentinelCycleReport with cycle results
84
+ """
85
+ global SENTINEL_RUNNING, _last_cycle_at, _last_cycle_report
86
+
87
+ # Check if already running
88
+ if SENTINEL_RUNNING:
89
+ self.logger.warning("Sentinel cycle already running, skipping")
90
+ if _last_cycle_report:
91
+ return SentinelCycleReport(**_last_cycle_report)
92
+ else:
93
+ # Return stub report
94
+ return SentinelCycleReport(
95
+ cycle_id="stub",
96
+ started_at=datetime.utcnow().isoformat(),
97
+ completed_at=datetime.utcnow().isoformat(),
98
+ alerts_found=0,
99
+ diagnoses_made=0,
100
+ patches_applied=0,
101
+ patches_pending_review=0,
102
+ capability_snapshot=self.tracker.snapshot()
103
+ )
104
+
105
+ SENTINEL_RUNNING = True
106
+ cycle_id = str(uuid.uuid4())
107
+ started_at = datetime.utcnow().isoformat()
108
+ patches_applied = 0
109
+ patches_pending = 0
110
+ diagnoses_made = 0
111
+
112
+ try:
113
+ # Step 1: Scan for alerts
114
+ self.logger.info(f"Starting sentinel cycle {cycle_id}")
115
+ all_alerts = self.watcher.scan()
116
+ self.logger.info(f"Sentinel scan found {len(all_alerts)} alerts")
117
+
118
+ # Step 2: Filter to high/critical only, respect max_diagnoses limit
119
+ priority_alerts = [a for a in all_alerts if a.severity in ("high", "critical")]
120
+ priority_alerts = sorted(
121
+ priority_alerts,
122
+ key=lambda a: (a.severity == "critical", a.timestamp),
123
+ reverse=True
124
+ )
125
+ priority_alerts = priority_alerts[:self.max_diagnoses]
126
+
127
+ self.logger.info(f"Processing {len(priority_alerts)} priority alerts")
128
+
129
+ # Step 3: Diagnose each alert
130
+ diagnoses = []
131
+ for alert in priority_alerts:
132
+ try:
133
+ self.logger.info(f"Diagnosing alert {alert.alert_id}")
134
+ diagnosis = self.diagnostician.diagnose(alert)
135
+ diagnoses.append(diagnosis)
136
+ diagnoses_made += 1
137
+ except Exception as e:
138
+ self.logger.warning(f"Diagnosis failed for alert {alert.alert_id}: {e}")
139
+ continue
140
+
141
+ # Step 4: Apply patches
142
+ for diagnosis in diagnoses:
143
+ try:
144
+ self.logger.info(f"Applying patch for diagnosis {diagnosis.diagnosis_id}")
145
+ result = self.patcher.apply(diagnosis)
146
+
147
+ if result.applied:
148
+ patches_applied += 1
149
+ self.logger.info(f"Patch applied: {result.patch_id}")
150
+ elif result.requires_human_review:
151
+ patches_pending += 1
152
+ self.logger.info(f"Patch queued for review: {result.patch_id}")
153
+ except Exception as e:
154
+ self.logger.error(f"Patch application failed: {e}")
155
+ continue
156
+
157
+ # Step 5: Capability snapshot
158
+ self.logger.info("Computing capability snapshot")
159
+ capability_snapshot = self.tracker.snapshot()
160
+
161
+ # Step 6: Build report
162
+ completed_at = datetime.utcnow().isoformat()
163
+ report = SentinelCycleReport(
164
+ cycle_id=cycle_id,
165
+ started_at=started_at,
166
+ completed_at=completed_at,
167
+ alerts_found=len(all_alerts),
168
+ diagnoses_made=diagnoses_made,
169
+ patches_applied=patches_applied,
170
+ patches_pending_review=patches_pending,
171
+ capability_snapshot=capability_snapshot
172
+ )
173
+
174
+ # Step 7: Save to cycle history
175
+ self._save_cycle_history(report)
176
+
177
+ # Update module state
178
+ _last_cycle_at = completed_at
179
+ _last_cycle_report = report.dict()
180
+
181
+ self.logger.info(f"Sentinel cycle {cycle_id} complete: {patches_applied} patches applied, {patches_pending} pending review")
182
+
183
+ return report
184
+
185
+ finally:
186
+ SENTINEL_RUNNING = False
187
+
188
+ def get_status(self) -> Dict[str, Any]:
189
+ """
190
+ Get current sentinel status.
191
+
192
+ Returns:
193
+ Status dict with health and metrics
194
+ """
195
+ try:
196
+ # Count recent alerts
197
+ alerts_this_week = self._count_recent_items("alert_history.json", days=7)
198
+
199
+ # Count recent patches
200
+ patches_this_week = self._count_recent_applied_patches(days=7)
201
+
202
+ # Count pending patches
203
+ pending_count = self._count_pending_patches()
204
+
205
+ # Get latest AGI index
206
+ agi_index = self._get_latest_agi_index()
207
+
208
+ return {
209
+ "sentinel_enabled": os.getenv("SENTINEL_ENABLED", "true").lower() == "true",
210
+ "current_health": self.watcher.is_healthy(),
211
+ "last_cycle_at": _last_cycle_at,
212
+ "sentinel_running": SENTINEL_RUNNING,
213
+ "agi_progression_index": agi_index,
214
+ "alerts_this_week": alerts_this_week,
215
+ "patches_applied_this_week": patches_this_week,
216
+ "pending_review_count": pending_count,
217
+ }
218
+
219
+ except Exception as e:
220
+ self.logger.error(f"Failed to get status: {e}")
221
+ return {
222
+ "sentinel_enabled": os.getenv("SENTINEL_ENABLED", "true").lower() == "true",
223
+ "current_health": True,
224
+ "last_cycle_at": _last_cycle_at,
225
+ "sentinel_running": SENTINEL_RUNNING,
226
+ "agi_progression_index": 0.0,
227
+ "alerts_this_week": 0,
228
+ "patches_applied_this_week": 0,
229
+ "pending_review_count": 0,
230
+ }
231
+
232
+ def _save_cycle_history(self, report: SentinelCycleReport):
233
+ """Save cycle report to history."""
234
+ try:
235
+ history_file = self.data_dir / "cycle_history.json"
236
+
237
+ # Load existing history
238
+ if history_file.exists():
239
+ with open(history_file, 'r') as f:
240
+ history = json.load(f)
241
+ else:
242
+ history = []
243
+
244
+ # Append new report
245
+ history.append(report.dict())
246
+
247
+ # Keep only last 20 entries
248
+ if len(history) > 20:
249
+ history = history[-20:]
250
+
251
+ # Save back
252
+ with open(history_file, 'w') as f:
253
+ json.dump(history, f, indent=2)
254
+
255
+ except Exception as e:
256
+ self.logger.error(f"Failed to save cycle history: {e}")
257
+
258
+ def _count_recent_items(self, filename: str, days: int) -> int:
259
+ """Count items in a history file from last N days."""
260
+ try:
261
+ file_path = self.data_dir / filename
262
+
263
+ if not file_path.exists():
264
+ return 0
265
+
266
+ with open(file_path, 'r') as f:
267
+ items = json.load(f)
268
+
269
+ cutoff = datetime.utcnow() - timedelta(days=days)
270
+ count = 0
271
+
272
+ for item in items:
273
+ try:
274
+ item_time = datetime.fromisoformat(item["timestamp"])
275
+ if item_time >= cutoff:
276
+ count += 1
277
+ except:
278
+ continue
279
+
280
+ return count
281
+
282
+ except Exception as e:
283
+ self.logger.warning(f"Failed to count recent items in {filename}: {e}")
284
+ return 0
285
+
286
+ def _count_recent_applied_patches(self, days: int) -> int:
287
+ """Count applied patches from last N days."""
288
+ try:
289
+ file_path = self.data_dir / "patch_history.json"
290
+
291
+ if not file_path.exists():
292
+ return 0
293
+
294
+ with open(file_path, 'r') as f:
295
+ patches = json.load(f)
296
+
297
+ cutoff = datetime.utcnow() - timedelta(days=days)
298
+ count = 0
299
+
300
+ for patch in patches:
301
+ try:
302
+ if not patch.get("applied", False):
303
+ continue
304
+
305
+ patch_time = datetime.fromisoformat(patch["timestamp"])
306
+ if patch_time >= cutoff:
307
+ count += 1
308
+ except:
309
+ continue
310
+
311
+ return count
312
+
313
+ except Exception as e:
314
+ self.logger.warning(f"Failed to count recent applied patches: {e}")
315
+ return 0
316
+
317
+ def _count_pending_patches(self) -> int:
318
+ """Count pending patches."""
319
+ try:
320
+ pending_dir = self.data_dir / "pending_patches"
321
+
322
+ if not pending_dir.exists():
323
+ return 0
324
+
325
+ return len(list(pending_dir.glob("*.patch.json")))
326
+
327
+ except Exception as e:
328
+ self.logger.warning(f"Failed to count pending patches: {e}")
329
+ return 0
330
+
331
+ def _get_latest_agi_index(self) -> float:
332
+ """Get latest AGI progression index."""
333
+ try:
334
+ history_file = self.data_dir / "capability_history.json"
335
+
336
+ if not history_file.exists():
337
+ return 0.0
338
+
339
+ with open(history_file, 'r') as f:
340
+ history = json.load(f)
341
+
342
+ if not history:
343
+ return 0.0
344
+
345
+ return history[-1].get("agi_progression_index", 0.0)
346
+
347
+ except Exception as e:
348
+ self.logger.warning(f"Failed to get latest AGI index: {e}")
349
+ return 0.0
backend/app/services/sentinel/watcher.py ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Sentinel Watcher - System Health Scanner
3
+
4
+ Scans existing data directories for signs of system degradation.
5
+ Reads only - never writes. Never calls any LLM.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ from datetime import datetime, timedelta
12
+ from pathlib import Path
13
+ from typing import List, Dict, Any, Optional
14
+ import uuid
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class SentinelAlert:
20
+ """Represents a system health alert."""
21
+
22
+ def __init__(
23
+ self,
24
+ layer: int,
25
+ component: str,
26
+ issue_type: str,
27
+ severity: str,
28
+ raw_evidence: str
29
+ ):
30
+ self.alert_id = str(uuid.uuid4())
31
+ self.layer = layer
32
+ self.component = component
33
+ self.issue_type = issue_type
34
+ self.severity = severity
35
+ self.raw_evidence = raw_evidence
36
+ self.timestamp = datetime.utcnow().isoformat()
37
+
38
+ def to_dict(self) -> Dict[str, Any]:
39
+ return {
40
+ "alert_id": self.alert_id,
41
+ "layer": self.layer,
42
+ "component": self.component,
43
+ "issue_type": self.issue_type,
44
+ "severity": self.severity,
45
+ "raw_evidence": self.raw_evidence,
46
+ "timestamp": self.timestamp,
47
+ }
48
+
49
+
50
+ class SentinelWatcher:
51
+ """Scans system for health issues."""
52
+
53
+ def __init__(self):
54
+ self.data_dir = Path("backend/app/data")
55
+ self._last_scan_time: Optional[datetime] = None
56
+ self._last_scan_result: Optional[bool] = None
57
+ self._cache_duration = timedelta(minutes=5)
58
+
59
+ def scan(self) -> List[SentinelAlert]:
60
+ """
61
+ Perform comprehensive system health scan.
62
+
63
+ Returns:
64
+ List of SentinelAlert objects found during scan
65
+ """
66
+ alerts = []
67
+
68
+ # Check 1: Log Errors (Layer 1)
69
+ alerts.extend(self._check_log_errors())
70
+
71
+ # Check 2: Failed Cases (Layer 2)
72
+ alerts.extend(self._check_failed_cases())
73
+
74
+ # Check 3: Provider Fallback Rate (Layer 1)
75
+ alerts.extend(self._check_provider_fallbacks())
76
+
77
+ # Check 4: Low Confidence Pattern (Layer 2)
78
+ alerts.extend(self._check_low_confidence())
79
+
80
+ # Check 5: Knowledge Store Health (Layer 5)
81
+ alerts.extend(self._check_knowledge_health())
82
+
83
+ # Save alerts to history
84
+ self._save_alert_history(alerts)
85
+
86
+ logger.info(f"Sentinel scan complete: {len(alerts)} alerts found")
87
+ return alerts
88
+
89
+ def is_healthy(self) -> bool:
90
+ """
91
+ Check if system is healthy (no high or critical alerts).
92
+
93
+ Returns:
94
+ True if healthy, False otherwise
95
+ """
96
+ # Use cached result if recent
97
+ if self._last_scan_time and self._last_scan_result is not None:
98
+ if datetime.utcnow() - self._last_scan_time < self._cache_duration:
99
+ return self._last_scan_result
100
+
101
+ # Perform new scan
102
+ alerts = self.scan()
103
+ high_or_critical = [a for a in alerts if a.severity in ("high", "critical")]
104
+
105
+ result = len(high_or_critical) == 0
106
+ self._last_scan_time = datetime.utcnow()
107
+ self._last_scan_result = result
108
+
109
+ return result
110
+
111
+ def _check_log_errors(self) -> List[SentinelAlert]:
112
+ """Check for errors in log files."""
113
+ alerts = []
114
+ logs_dir = self.data_dir / "logs"
115
+
116
+ if not logs_dir.exists():
117
+ return alerts
118
+
119
+ try:
120
+ error_count = 0
121
+ first_error = None
122
+
123
+ for log_file in logs_dir.glob("*.log"):
124
+ try:
125
+ with open(log_file, 'r') as f:
126
+ lines = f.readlines()
127
+ recent_lines = lines[-200:] if len(lines) > 200 else lines
128
+
129
+ for line in recent_lines:
130
+ if any(keyword in line for keyword in ["ERROR", "CRITICAL", "Exception", "Traceback"]):
131
+ error_count += 1
132
+ if first_error is None:
133
+ first_error = line.strip()[:300]
134
+ except Exception as e:
135
+ logger.warning(f"Failed to read log file {log_file}: {e}")
136
+ continue
137
+
138
+ if error_count > 10:
139
+ alerts.append(SentinelAlert(
140
+ layer=1,
141
+ component="logs",
142
+ issue_type="error",
143
+ severity="high",
144
+ raw_evidence=f"Found {error_count} errors in logs. First: {first_error or 'N/A'}"
145
+ ))
146
+
147
+ except Exception as e:
148
+ logger.warning(f"Failed to check log errors: {e}")
149
+
150
+ return alerts
151
+
152
+ def _check_failed_cases(self) -> List[SentinelAlert]:
153
+ """Check for failed case executions."""
154
+ alerts = []
155
+ memory_dir = self.data_dir / "memory"
156
+
157
+ if not memory_dir.exists() or not any(memory_dir.iterdir()):
158
+ return alerts
159
+
160
+ try:
161
+ case_files = sorted(memory_dir.glob("*.json"), key=os.path.getmtime, reverse=True)[:50]
162
+
163
+ if not case_files:
164
+ return alerts
165
+
166
+ total_count = 0
167
+ failed_count = 0
168
+
169
+ for case_file in case_files:
170
+ try:
171
+ with open(case_file, 'r') as f:
172
+ case_data = json.load(f)
173
+
174
+ total_count += 1
175
+
176
+ # Check if case failed
177
+ final_answer = case_data.get("final_answer", "")
178
+ outputs = case_data.get("outputs", [])
179
+
180
+ is_failed = False
181
+
182
+ if not final_answer or not outputs:
183
+ is_failed = True
184
+ else:
185
+ # Check for low confidence in any agent output
186
+ for output in outputs:
187
+ if isinstance(output, dict):
188
+ confidence = output.get("confidence", 1.0)
189
+ if confidence < 0.4:
190
+ is_failed = True
191
+ break
192
+
193
+ if is_failed:
194
+ failed_count += 1
195
+
196
+ except Exception as e:
197
+ logger.warning(f"Failed to read case file {case_file}: {e}")
198
+ continue
199
+
200
+ if total_count > 0:
201
+ failure_rate = failed_count / total_count
202
+
203
+ if failure_rate > 0.3:
204
+ severity = "critical" if failure_rate > 0.6 else "high"
205
+ alerts.append(SentinelAlert(
206
+ layer=2,
207
+ component="agents",
208
+ issue_type="degradation",
209
+ severity=severity,
210
+ raw_evidence=f"Failed case rate: {failure_rate*100:.1f}% of last {total_count} cases"
211
+ ))
212
+
213
+ except Exception as e:
214
+ logger.warning(f"Failed to check failed cases: {e}")
215
+
216
+ return alerts
217
+
218
+ def _check_provider_fallbacks(self) -> List[SentinelAlert]:
219
+ """Check for excessive provider fallbacks."""
220
+ alerts = []
221
+ logs_dir = self.data_dir / "logs"
222
+
223
+ if not logs_dir.exists():
224
+ return alerts
225
+
226
+ try:
227
+ fallback_count = 0
228
+
229
+ for log_file in logs_dir.glob("*.log"):
230
+ try:
231
+ with open(log_file, 'r') as f:
232
+ lines = f.readlines()
233
+ recent_lines = lines[-200:] if len(lines) > 200 else lines
234
+
235
+ for line in recent_lines:
236
+ if any(keyword in line for keyword in ["fallback", "Fallback", "primary provider failed"]):
237
+ fallback_count += 1
238
+ except Exception as e:
239
+ logger.warning(f"Failed to read log file {log_file}: {e}")
240
+ continue
241
+
242
+ if fallback_count > 20:
243
+ severity = "high" if fallback_count > 50 else "medium"
244
+ alerts.append(SentinelAlert(
245
+ layer=1,
246
+ component="provider",
247
+ issue_type="degradation",
248
+ severity=severity,
249
+ raw_evidence=f"Provider fallback triggered {fallback_count} times in recent logs"
250
+ ))
251
+
252
+ except Exception as e:
253
+ logger.warning(f"Failed to check provider fallbacks: {e}")
254
+
255
+ return alerts
256
+
257
+ def _check_low_confidence(self) -> List[SentinelAlert]:
258
+ """Check for low confidence pattern across cases."""
259
+ alerts = []
260
+ memory_dir = self.data_dir / "memory"
261
+
262
+ if not memory_dir.exists() or not any(memory_dir.iterdir()):
263
+ return alerts
264
+
265
+ try:
266
+ case_files = sorted(memory_dir.glob("*.json"), key=os.path.getmtime, reverse=True)[:50]
267
+
268
+ if not case_files:
269
+ return alerts
270
+
271
+ all_confidences = []
272
+
273
+ for case_file in case_files:
274
+ try:
275
+ with open(case_file, 'r') as f:
276
+ case_data = json.load(f)
277
+
278
+ outputs = case_data.get("outputs", [])
279
+
280
+ for output in outputs:
281
+ if isinstance(output, dict):
282
+ confidence = output.get("confidence")
283
+ if confidence is not None:
284
+ all_confidences.append(confidence)
285
+
286
+ except Exception as e:
287
+ logger.warning(f"Failed to read case file {case_file}: {e}")
288
+ continue
289
+
290
+ if all_confidences:
291
+ avg_confidence = sum(all_confidences) / len(all_confidences)
292
+
293
+ if avg_confidence < 0.4:
294
+ alerts.append(SentinelAlert(
295
+ layer=2,
296
+ component="synthesizer",
297
+ issue_type="degradation",
298
+ severity="medium",
299
+ raw_evidence=f"Average agent confidence: {avg_confidence:.2f}"
300
+ ))
301
+
302
+ except Exception as e:
303
+ logger.warning(f"Failed to check low confidence: {e}")
304
+
305
+ return alerts
306
+
307
+ def _check_knowledge_health(self) -> List[SentinelAlert]:
308
+ """Check knowledge store health."""
309
+ alerts = []
310
+ knowledge_dir = self.data_dir / "knowledge"
311
+
312
+ if not knowledge_dir.exists():
313
+ return alerts
314
+
315
+ try:
316
+ stale_count = 0
317
+ cutoff = datetime.utcnow() - timedelta(days=7)
318
+
319
+ for knowledge_file in knowledge_dir.glob("*.json"):
320
+ try:
321
+ mtime = datetime.fromtimestamp(knowledge_file.stat().st_mtime)
322
+ if mtime < cutoff:
323
+ stale_count += 1
324
+ except Exception as e:
325
+ logger.warning(f"Failed to check knowledge file {knowledge_file}: {e}")
326
+ continue
327
+
328
+ if stale_count > 10:
329
+ alerts.append(SentinelAlert(
330
+ layer=5,
331
+ component="knowledge_store",
332
+ issue_type="anomaly",
333
+ severity="low",
334
+ raw_evidence=f"{stale_count} knowledge items older than 7 days"
335
+ ))
336
+
337
+ except Exception as e:
338
+ logger.warning(f"Failed to check knowledge health: {e}")
339
+
340
+ return alerts
341
+
342
+ def _save_alert_history(self, alerts: List[SentinelAlert]):
343
+ """Save alerts to history file."""
344
+ try:
345
+ sentinel_dir = self.data_dir / "sentinel"
346
+ sentinel_dir.mkdir(parents=True, exist_ok=True)
347
+
348
+ history_file = sentinel_dir / "alert_history.json"
349
+
350
+ # Load existing history
351
+ if history_file.exists():
352
+ with open(history_file, 'r') as f:
353
+ history = json.load(f)
354
+ else:
355
+ history = []
356
+
357
+ # Append new alerts
358
+ for alert in alerts:
359
+ history.append(alert.to_dict())
360
+
361
+ # Keep only last 200 entries
362
+ if len(history) > 200:
363
+ history = history[-200:]
364
+
365
+ # Save back
366
+ with open(history_file, 'w') as f:
367
+ json.dump(history, f, indent=2)
368
+
369
+ except Exception as e:
370
+ logger.error(f"Failed to save alert history: {e}")
backend/app/services/simulation_store.py CHANGED
@@ -1,7 +1,7 @@
1
  import json
2
  from datetime import datetime
3
  from pathlib import Path
4
- from typing import Any, Dict, Optional
5
 
6
  from app.config import SIMULATION_DIR
7
 
@@ -24,4 +24,53 @@ def get_simulation(simulation_id: str) -> Optional[Dict[str, Any]]:
24
  path = _path(simulation_id)
25
  if not path.exists():
26
  return None
27
- return json.loads(path.read_text(encoding="utf-8"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
  from datetime import datetime
3
  from pathlib import Path
4
+ from typing import Any, Dict, Optional, List
5
 
6
  from app.config import SIMULATION_DIR
7
 
 
24
  path = _path(simulation_id)
25
  if not path.exists():
26
  return None
27
+ return json.loads(path.read_text(encoding="utf-8"))
28
+
29
+
30
+ def list_simulations(limit: Optional[int] = None) -> List[Dict[str, Any]]:
31
+ """List all simulations with optional limit."""
32
+ simulations = []
33
+ for path in Path(SIMULATION_DIR).glob("*.json"):
34
+ try:
35
+ data = json.loads(path.read_text(encoding="utf-8"))
36
+ simulations.append(data)
37
+ except Exception:
38
+ continue
39
+
40
+ # Sort by saved_at descending (newest first)
41
+ simulations.sort(key=lambda x: x.get("saved_at", ""), reverse=True)
42
+
43
+ if limit:
44
+ return simulations[:limit]
45
+ return simulations
46
+
47
+
48
+ def search_simulations(query: str, status: Optional[str] = None) -> List[Dict[str, Any]]:
49
+ """
50
+ Search simulations by title or prediction_goal.
51
+ Optionally filter by status.
52
+ """
53
+ all_sims = list_simulations()
54
+ results = []
55
+
56
+ query_lower = query.lower()
57
+
58
+ for sim in all_sims:
59
+ # Filter by status if provided
60
+ if status and sim.get("status") != status:
61
+ continue
62
+
63
+ # Search in title and prediction_goal
64
+ title = sim.get("title", "").lower()
65
+ goal = sim.get("prediction_goal", "").lower()
66
+
67
+ if query_lower in title or query_lower in goal:
68
+ results.append(sim)
69
+
70
+ return results
71
+
72
+
73
+ def filter_simulations_by_status(status: str) -> List[Dict[str, Any]]:
74
+ """Filter simulations by status."""
75
+ all_sims = list_simulations()
76
+ return [sim for sim in all_sims if sim.get("status") == status]