File size: 45,973 Bytes
3054503
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
"""
ARCHAI Adaptive AI Assessment Engine
===================================
SOTA: 2PL-IRT adaptive selection + Bayesian knowledge tracing + LLM learning paths

Plug-and-play backend for your-ai-arch.netlify.app
Replaces static question bank with adaptive, intelligent assessment.
"""

import json
import math
import random
import uuid
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, field
from enum import Enum
import numpy as np
from scipy.optimize import minimize_scalar

# ============================================================================
# DATA MODELS — compatible with existing archai frontend
# ============================================================================

class Dimension(Enum):
    LITERACY = "literacy"
    TOOLING = "tooling"
    STRATEGY = "strategy"
    IMPLEMENTATION = "implementation"
    GOVERNANCE = "governance"
    DATA = "data"

DIMENSION_LABELS = {
    Dimension.LITERACY: "AI Literacy",
    Dimension.TOOLING: "Tool Proficiency",
    Dimension.STRATEGY: "Strategic Thinking",
    Dimension.IMPLEMENTATION: "Implementation",
    Dimension.GOVERNANCE: "Governance & Ethics",
    Dimension.DATA: "Data Fluency",
}

DIMENSION_COLORS = {
    "literacy": "#FB7185",
    "tooling": "#10B981",
    "strategy": "#F97316",
    "implementation": "#14B8A6",
    "governance": "#F43F5E",
    "data": "#34D399",
}

@dataclass
class Question:
    id: str
    dimension: Dimension
    text: str
    options: List[str]
    difficulty: float  # b parameter in IRT (higher = harder)
    discrimination: float  # a parameter in IRT
    concept_tags: List[str] = field(default_factory=list)

@dataclass
class StudentState:
    """Bayesian knowledge state per dimension."""
    session_id: str
    theta: Dict[Dimension, float]  # latent ability estimate per dimension
    theta_variance: Dict[Dimension, float]
    asked_questions: List[str] = field(default_factory=list)
    responses: Dict[str, int] = field(default_factory=dict)  # question_id -> option_index
    response_history: List[Dict] = field(default_factory=list)
    start_time: str = field(default_factory=lambda: datetime.utcnow().isoformat())
    
    def get_unasked(self, question_bank: List[Question]) -> List[Question]:
        return [q for q in question_bank if q.id not in self.asked_questions]


# ============================================================================
# QUESTION BANK — Calibrated with IRT parameters
# ============================================================================

def build_question_bank() -> List[Question]:
    """Calibrated question bank mapped to archai's 6 dimensions."""
    bank = []
    
    # --- LITERACY ---
    bank.extend([
        Question("lit_1", Dimension.LITERACY,
            "How well can you explain the difference between machine learning, deep learning, and generative AI?",
            ["Not at all", "Basic overview", "Clearly with examples", "Could teach a workshop"],
            -2.0, 1.2, ["ml_basics", "dl_vs_ml", "gen_ai"]),
        Question("lit_2", Dimension.LITERACY,
            "How familiar are you with concepts like tokens, context windows, fine-tuning, and RAG?",
            ["Never heard of them", "Heard the terms", "Understand conceptually", "Use them in practice"],
            -1.0, 1.5, ["tokens", "rag", "fine_tuning"]),
        Question("lit_3", Dimension.LITERACY,
            "Can you explain what a transformer architecture is and how attention mechanisms work?",
            ["No idea", "Vague understanding", "Can explain to a peer", "Can implement from scratch"],
            0.5, 1.8, ["transformers", "attention", "architecture"]),
        Question("lit_4", Dimension.LITERACY,
            "How well do you understand the scaling laws that govern LLM performance?",
            ["Never heard", "Basic awareness", "Can discuss tradeoffs", "Can apply to model selection"],
            1.5, 2.0, ["scaling_laws", "compute", "model_selection"]),
    ])
    
    # --- TOOLING ---
    bank.extend([
        Question("tool_1", Dimension.TOOLING,
            "How frequently do you use AI tools (ChatGPT, Copilot, Claude, etc.) in your work?",
            ["Never", "Occasionally", "Weekly", "Daily, core to workflow"],
            -2.0, 1.0, ["chatgpt", "copilot", "claude", "usage_frequency"]),
        Question("tool_2", Dimension.TOOLING,
            "Can you chain multiple AI tools or prompts to complete a complex task end-to-end?",
            ["No", "Tried once or twice", "Sometimes successfully", "Regularly with custom workflows"],
            -0.5, 1.3, ["prompt_chaining", "tool_orchestration", "workflows"]),
        Question("tool_3", Dimension.TOOLING,
            "Have you set up API integrations with LLM providers (OpenAI, Anthropic, local models)?",
            ["Never", "Used a no-code tool", "Wrote code for it", "Built production integrations"],
            0.5, 1.5, ["api_integration", "openai_api", "local_models"]),
        Question("tool_4", Dimension.TOOLING,
            "How comfortable are you running open-source models locally with Ollama, LM Studio, or vLLM?",
            ["Don't know what those are", "Installed one once", "Run models regularly", "Optimize inference for production"],
            1.5, 1.8, ["ollama", "lm_studio", "vllm", "local_inference"]),
    ])
    
    # --- STRATEGY ---
    bank.extend([
        Question("strat_1", Dimension.STRATEGY,
            "When evaluating a new project, do you assess where AI could add value or reduce effort?",
            ["Never consider it", "Occasionally think about it", "Systematically evaluate", "Lead AI-first ideation"],
            -1.5, 1.1, ["ai_opportunity", "value_assessment", "project_evaluation"]),
        Question("strat_2", Dimension.STRATEGY,
            "Can you articulate the ROI or business case for an AI initiative to stakeholders?",
            ["Wouldn't know where to start", "Could outline rough benefits", "Can build a structured case", "Have done this successfully"],
            -0.5, 1.2, ["roi", "business_case", "stakeholder_communication"]),
        Question("strat_3", Dimension.STRATEGY,
            "Do you have a framework for prioritizing AI initiatives by feasibility vs impact?",
            ["No framework", "Informal mental model", "Structured scoring system", "Organization-wide prioritization process"],
            0.8, 1.6, ["prioritization", "feasibility", "impact_matrix"]),
        Question("strat_4", Dimension.STRATEGY,
            "Can you identify competitive moats and differentiation through AI capabilities?",
            ["Not applicable to my role", "Basic understanding", "Can analyze for my industry", "Have built AI-driven differentiation"],
            1.8, 2.0, ["competitive_moat", "differentiation", "ai_strategy"]),
    ])
    
    # --- IMPLEMENTATION ---
    bank.extend([
        Question("impl_1", Dimension.IMPLEMENTATION,
            "Have you built, deployed, or significantly configured an AI-powered solution?",
            ["Never", "Followed a tutorial", "Built a working prototype", "Deployed to production"],
            -1.5, 1.2, ["deployment", "prototype", "production"]),
        Question("impl_2", Dimension.IMPLEMENTATION,
            "How comfortable are you with prompt engineering, API integration, or model evaluation?",
            ["Not at all", "Basic awareness", "Can do with guidance", "Highly proficient"],
            -0.5, 1.4, ["prompt_engineering", "api_integration", "model_eval"]),
        Question("impl_3", Dimension.IMPLEMENTATION,
            "Have you built a RAG system or fine-tuned a model for a specific domain?",
            ["Don't know what RAG is", "Used a no-code RAG tool", "Built custom RAG pipeline", "Fine-tuned and deployed domain model"],
            0.8, 1.6, ["rag", "fine_tuning", "domain_adaptation"]),
        Question("impl_4", Dimension.IMPLEMENTATION,
            "Can you architect a multi-agent system or design LLM orchestration workflows?",
            ["No idea", "Understand conceptually", "Built a simple agent", "Production multi-agent system"],
            1.8, 1.9, ["agents", "orchestration", "langgraph", "crewai"]),
    ])
    
    # --- GOVERNANCE ---
    bank.extend([
        Question("gov_1", Dimension.GOVERNANCE,
            "How well do you understand AI risks like hallucination, bias, data privacy, and IP exposure?",
            ["Not aware", "Heard about them", "Understand key risks", "Can design mitigations"],
            -1.5, 1.0, ["hallucination", "bias", "privacy", "ip_risk"]),
        Question("gov_2", Dimension.GOVERNANCE,
            "Does your workflow include checks for AI output accuracy, fairness, or compliance?",
            ["No checks", "Occasional review", "Standard process", "Systematic governance framework"],
            -0.3, 1.2, ["accuracy_checks", "fairness", "compliance"]),
        Question("gov_3", Dimension.GOVERNANCE,
            "Are you familiar with AI regulations (EU AI Act, NIST AI RMF, ISO 42001)?",
            ["Never heard", "Aware they exist", "Can navigate requirements", "Implemented compliance program"],
            0.8, 1.5, ["eu_ai_act", "nist_rmf", "iso_42001", "regulation"]),
        Question("gov_4", Dimension.GOVERNANCE,
            "Can you design an AI governance framework covering data lineage, model cards, and audit trails?",
            ["Not my area", "Understand components", "Can design for a team", "Enterprise-wide implementation"],
            1.8, 1.8, ["governance_framework", "model_cards", "audit_trail", "data_lineage"]),
    ])
    
    # --- DATA ---
    bank.extend([
        Question("data_1", Dimension.DATA,
            "How comfortable are you working with structured and unstructured data for AI use cases?",
            ["Uncomfortable", "Can read simple reports", "Can clean and prep data", "Can architect data pipelines"],
            -1.5, 1.1, ["structured_data", "unstructured_data", "data_prep"]),
        Question("data_2", Dimension.DATA,
            "Can you evaluate whether data is sufficient and appropriate for training or prompting an AI system?",
            ["No", "Vaguely", "With guidance", "Yes, independently"],
            -0.3, 1.3, ["data_quality", "data_sufficiency", "training_data"]),
        Question("data_3", Dimension.DATA,
            "Have you worked with embeddings, vector databases, or data augmentation for AI?",
            ["No experience", "Used a vector DB via UI", "Built embedding pipelines", "Optimized retrieval systems"],
            0.8, 1.5, ["embeddings", "vector_db", "data_augmentation", "retrieval"]),
        Question("data_4", Dimension.DATA,
            "Can you design data collection strategies and evaluate dataset bias for model training?",
            ["Not applicable", "Basic awareness", "Can assess existing datasets", "Design collection from scratch"],
            1.6, 1.7, ["data_collection", "dataset_bias", "training_strategy"]),
    ])
    
    return bank


# ============================================================================
# IRT ENGINE — 2PL Model with Fisher Information
# ============================================================================

class IRTEngine:
    """
    Two-Parameter Logistic (2PL) IRT model.
    P(correct|theta) = sigmoid(a * (theta - b))
    """
    
    @staticmethod
    def sigmoid(z: float) -> float:
        return 1.0 / (1.0 + math.exp(-z))
    
    @staticmethod
    def probability(theta: float, a: float, b: float) -> float:
        """Probability of a correct (high-score) response."""
        return IRTEngine.sigmoid(a * (theta - b))
    
    @staticmethod
    def fisher_information(theta: float, a: float, b: float) -> float:
        """Fisher information — measure of how precisely a question measures ability at theta."""
        p = IRTEngine.probability(theta, a, b)
        return (a ** 2) * p * (1 - p)
    
    @staticmethod
    def likelihood(theta: float, responses: List[Tuple[float, float, int]], max_option: int = 3) -> float:
        """
        Compute likelihood of theta given responses.
        responses: list of (a, b, option_index) tuples.
        option_index 0 = lowest, max_option = highest.
        We model this as a graded response model approximation.
        """
        log_lik = 0.0
        for a, b, opt_idx in responses:
            # Map option to a "correctness weight" 0.0 to 1.0
            weight = opt_idx / max_option
            # Expected probability of this weighted response
            p = IRTEngine.probability(theta, a, b)
            # Weighted likelihood: blend of correct and incorrect
            # Higher option → closer to p=1, lower option → closer to p=0
            expected = weight * p + (1 - weight) * (1 - p)
            expected = max(expected, 1e-10)  # avoid log(0)
            log_lik += math.log(expected)
        return log_lik
    
    @staticmethod
    def estimate_theta(responses: List[Tuple[float, float, int]], prior_mean: float = 0.0, prior_std: float = 1.0) -> Tuple[float, float]:
        """
        MAP estimate of theta given responses.
        Returns (theta_estimate, standard_error).
        """
        if not responses:
            return prior_mean, prior_std
        
        # Prior contribution to log-posterior
        def neg_log_posterior(theta):
            log_prior = -0.5 * ((theta - prior_mean) / prior_std) ** 2
            log_lik = IRTEngine.likelihood(theta, responses)
            return -(log_prior + log_lik)
        
        result = minimize_scalar(neg_log_posterior, bounds=(-4.0, 4.0), method='bounded')
        theta_hat = result.x
        
        # Approximate standard error from Fisher information at MAP
        fisher = sum(IRTEngine.fisher_information(theta_hat, a, b) for a, b, _ in responses)
        se = 1.0 / math.sqrt(fisher + 1.0 / (prior_std ** 2))
        
        return theta_hat, se


# ============================================================================
# ADAPTIVE SELECTOR — Fisher Information Maximization
# ============================================================================

class AdaptiveSelector:
    """
    Selects next question maximizing Fisher information at current ability estimate.
    Implements content balancing (ensures all dimensions are covered).
    """
    
    def __init__(self, min_per_dimension: int = 1, max_total: int = 12, target_precision: float = 0.3):
        self.min_per_dimension = min_per_dimension
        self.max_total = max_total
        self.target_precision = target_precision
    
    def select_next(
        self,
        state: StudentState,
        question_bank: List[Question],
        balance_penalty: float = 2.0
    ) -> Optional[Question]:
        """
        Select next question using Fisher information with content balancing.
        """
        unasked = state.get_unasked(question_bank)
        if not unasked:
            return None
        
        # Count questions per dimension already asked
        dim_counts = {d: 0 for d in Dimension}
        for qid in state.asked_questions:
            q = next((qq for qq in question_bank if qq.id == qid), None)
            if q:
                dim_counts[q.dimension] += 1
        
        # Information scores
        scores = []
        for q in unasked:
            theta = state.theta.get(q.dimension, 0.0)
            info = IRTEngine.fisher_information(theta, q.discrimination, q.difficulty)
            
            # Content balancing: boost under-represented dimensions
            count = dim_counts[q.dimension]
            if count < self.min_per_dimension:
                info *= balance_penalty * (self.min_per_dimension - count + 1)
            
            # Precision stopping: if SE is already good, slightly deprioritize
            se = state.theta_variance.get(q.dimension, 1.0)
            if se < self.target_precision:
                info *= 0.7
            
            scores.append((info, q))
        
        scores.sort(key=lambda x: x[0], reverse=True)
        
        # Return top-scoring question
        return scores[0][1] if scores else None
    
    def should_stop(self, state: StudentState) -> bool:
        """Stop when max questions reached or all dimensions have sufficient precision."""
        if len(state.asked_questions) >= self.max_total:
            return True
        
        # Stop early if all dimensions have good precision and minimum coverage
        dim_coverage = {d: 0 for d in Dimension}
        dim_precision = {d: float('inf') for d in Dimension}
        
        for qid in state.asked_questions:
            q = next((qq for qq in build_question_bank() if qq.id == qid), None)
            if q:
                dim_coverage[q.dimension] += 1
                dim_precision[q.dimension] = min(
                    dim_precision[q.dimension],
                    state.theta_variance.get(q.dimension, 1.0)
                )
        
        all_covered = all(c >= self.min_per_dimension for c in dim_coverage.values())
        all_precise = all(se < self.target_precision for se in dim_precision.values() if se != float('inf'))
        
        return all_covered and all_precise and len(state.asked_questions) >= 6


# ============================================================================
# KNOWLEDGE TRACING — Bayesian Update
# ============================================================================

class KnowledgeTracer:
    """
    Bayesian knowledge tracing per dimension.
    Updates latent ability (theta) after each response.
    """
    
    def __init__(self, prior_mean: float = 0.0, prior_std: float = 1.0):
        self.prior_mean = prior_mean
        self.prior_std = prior_std
        self.irt = IRTEngine()
    
    def update(
        self,
        state: StudentState,
        question: Question,
        option_index: int,
        max_option: int = 3
    ) -> StudentState:
        """Update student state with new response using Bayesian IRT."""
        
        dim = question.dimension
        
        # Add to history
        state.asked_questions.append(question.id)
        state.responses[question.id] = option_index
        state.response_history.append({
            "question_id": question.id,
            "dimension": dim.value,
            "option_index": option_index,
            "timestamp": datetime.utcnow().isoformat(),
        })
        
        # Gather all responses for this dimension
        dim_responses = []
        for qid, opt in state.responses.items():
            q = next((qq for qq in build_question_bank() if qq.id == qid), None)
            if q and q.dimension == dim:
                dim_responses.append((q.discrimination, q.difficulty, opt))
        
        # Re-estimate theta for this dimension
        theta, se = self.irt.estimate_theta(dim_responses, self.prior_mean, self.prior_std)
        state.theta[dim] = theta
        state.theta_variance[dim] = se
        
        return state
    
    def get_dimension_scores(self, state: StudentState) -> Dict[str, int]:
        """Convert latent theta to 0-100 scores (archai-compatible)."""
        scores = {}
        for dim in Dimension:
            theta = state.theta.get(dim, 0.0)
            # Convert theta (-4 to 4) to 0-100 with sigmoid
            # theta=0 → 50%, theta=2 → ~88%, theta=-2 → ~12%
            score = int(round(100 * self.irt.sigmoid(theta * 0.8 + 0.1) * 1.1))
            score = max(5, min(95, score))
            scores[dim.value] = score
        return scores
    
    def get_overall_score(self, state: StudentState) -> int:
        scores = self.get_dimension_scores(state)
        return round(sum(scores.values()) / len(scores))


# ============================================================================
# LEARNING PATH GENERATOR — Structured day/week/month actionables
# ============================================================================

class LearningPathGenerator:
    """
    Generates granular learning paths with day/week/month actionables.
    Uses rule-based logic aligned with archai's action plan structure.
    """
    
    def __init__(self):
        self.stages = [
            {"id": "awareness", "label": "Awareness", "threshold": 20, "desc": "You recognize AI's potential"},
            {"id": "understanding", "label": "Understanding", "threshold": 40, "desc": "You grasp core concepts"},
            {"id": "application", "label": "Application", "threshold": 60, "desc": "You use AI daily"},
            {"id": "integration", "label": "Integration", "threshold": 75, "desc": "AI is embedded in your work"},
            {"id": "mastery", "label": "Mastery", "threshold": 90, "desc": "You architect AI systems"},
        ]
        
        self.archetypes = [
            {"id": "pioneer", "label": "The Pioneer", "desc": "High across the board — charting new territory",
             "condition": lambda s: all(v >= 70 for v in s.values())},
            {"id": "responsible-builder", "label": "The Responsible Builder", "desc": "Balances capability with caution",
             "condition": lambda s: s.get("governance", 0) >= 60 and s.get("implementation", 0) >= 50},
            {"id": "data-craftsman", "label": "The Data Craftsman", "desc": "Data-first, builds from evidence",
             "condition": lambda s: s.get("data", 0) >= 60 and s.get("implementation", 0) >= 50},
            {"id": "power-user", "label": "The Power User", "desc": "Fluent with tools, ready to strategize next",
             "condition": lambda s: s.get("tooling", 0) >= 60 and s.get("strategy", 0) < 50},
            {"id": "vision-caster", "label": "The Vision Caster", "desc": "Strategic thinker — hands-on comes next",
             "condition": lambda s: s.get("strategy", 0) >= 60 and s.get("implementation", 0) < 50},
            {"id": "integrator", "label": "The Integrator", "desc": "Well-rounded across every dimension",
             "condition": lambda s: (avg := sum(s.values())/len(s.values()), sd := (sum((v-avg)**2 for v in s.values())/len(s.values()))**0.5, sd < 18 and avg >= 50)[2]},
            {"id": "explorer", "label": "The Explorer", "desc": "Curious and ready to dive in",
             "condition": lambda s: all(v < 45 for v in s.values())},
            {"id": "apprentice", "label": "The Apprentice", "desc": "Building foundational fluency",
             "condition": lambda s: True},  # fallback
        ]
    
    def determine_stage(self, overall_score: int) -> Dict:
        stage = self.stages[0]
        for s in self.stages:
            if overall_score >= s["threshold"]:
                stage = s
        return stage
    
    def determine_archetype(self, scores: Dict[str, int]) -> Dict:
        for arch in self.archetypes:
            try:
                if arch["condition"](scores):
                    return {"id": arch["id"], "label": arch["label"], "desc": arch["desc"]}
            except:
                continue
        return {"id": "apprentice", "label": "The Apprentice", "desc": "Building foundational fluency"}
    
    def generate_learning_path(
        self,
        scores: Dict[str, int],
        persona_id: str,
        hours_per_week: int,
        budget_usd: int,
        hardware_id: Optional[str] = None,
        preference: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Generate a comprehensive learning path with day/week/month granularity.
        """
        overall = round(sum(scores.values()) / len(scores))
        stage = self.determine_stage(overall)
        archetype = self.determine_archetype(scores)
        
        # Identify weakest dimensions (gaps to close)
        sorted_dims = sorted(scores.items(), key=lambda x: x[1])
        
        # Build time-bucketed actionables
        days = self._generate_days(sorted_dims, persona_id, hours_per_week, budget_usd)
        weeks = self._generate_weeks(sorted_dims, persona_id, hours_per_week, budget_usd, stage)
        months = self._generate_months(sorted_dims, persona_id, hours_per_week, budget_usd, stage, hardware_id)
        
        return {
            "overall_score": overall,
            "stage": stage,
            "archetype": archetype,
            "dimension_scores": scores,
            "gaps": [{"dimension": d, "score": s, "priority": i+1} for i, (d, s) in enumerate(sorted_dims[:3])],
            "strengths": [{"dimension": d, "score": s} for d, s in sorted_dims[-2:]],
            "learning_path": {
                "days": days,
                "weeks": weeks,
                "months": months,
            },
            "projections": self._compute_projections(overall, stage, hours_per_week),
            "meta": {
                "total_hours": sum(a.get("estimated_hours", 0) for w in weeks for a in w["actions"]),
                "estimated_weeks": max(1, round(sum(a.get("estimated_hours", 0) for w in weeks for a in w["actions"]) / hours_per_week)) if hours_per_week else None,
                "generated_at": datetime.utcnow().isoformat(),
            }
        }
    
    def _generate_days(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int) -> List[Dict]:
        """Day 1-7 granular actionables — immediate, bite-sized wins."""
        weakest = sorted_dims[0][0]
        
        # Day 1: Always start with the weakest dimension
        day1_actions = {
            "literacy": {
                "title": "Read the Anthropic Prompt Engineering Guide",
                "desc": "The highest-ROI single hour. Changes how you talk to every model.",
                "link": "https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/overview",
                "time": "45 min",
                "type": "reading",
            },
            "tooling": {
                "title": "Try Google AI Studio with a real work task",
                "desc": "Open AI Studio. Paste any work email or doc. Ask: 'What am I missing?'",
                "link": "https://aistudio.google.com",
                "time": "15 min",
                "type": "hands_on",
            },
            "strategy": {
                "title": "List 3 weekly tasks you hate",
                "desc": "Open Notes. Write the 3 most repetitive things you did this week.",
                "link": None,
                "time": "10 min",
                "type": "worksheet",
            },
            "implementation": {
                "title": "Install Claude Code or Cursor",
                "desc": "One terminal command. You'll have an AI pair programmer before lunch.",
                "link": "https://docs.anthropic.com/en/docs/claude-code",
                "time": "10 min",
                "type": "setup",
            },
            "governance": {
                "title": "Skim the NIST AI RMF index",
                "desc": "Five minutes tells you what you don't know. The framework is free.",
                "link": "https://www.nist.gov/itl/ai-risk-management-framework",
                "time": "15 min",
                "type": "reading",
            },
            "data": {
                "title": "Ask Gemini about your spreadsheet",
                "desc": "Open any Google Sheet. Use the side panel: 'Summarize this for me.'",
                "link": "https://workspace.google.com/products/gemini/",
                "time": "5 min",
                "type": "hands_on",
            },
        }
        
        days = []
        
        # Day 1: Close biggest gap
        action = day1_actions.get(weakest, day1_actions["literacy"])
        days.append({
            "day": 1,
            "focus": f"Close your {weakest} gap",
            "title": action["title"],
            "description": action["desc"],
            "action_type": action["type"],
            "estimated_time": action["time"],
            "resource_link": action["link"],
            "why": f"Your {weakest} score is lowest. A small win here unlocks everything else.",
            "quick_win": True,
        })
        
        # Day 2-7: Rotating through dimensions
        day_templates = [
            ("tooling", "Daily AI tool practice", "Use an AI tool for one real work task today.", "15 min"),
            ("literacy", "Watch one AI explainer", "Pick a 10-min video on YouTube about LLMs, RAG, or agents.", "15 min"),
            ("implementation", "Build something tiny", "Create a prompt template or simple automation.", "30 min"),
            ("strategy", "Map one AI opportunity", "Pick a work process. Ask: how could AI help?", "20 min"),
            ("governance", "Review one AI risk", "Read about one AI failure case. What went wrong?", "15 min"),
            ("data", "Explore your data", "Open a dataset you use. What patterns could AI find?", "20 min"),
        ]
        
        for i, (dim, title, desc, time) in enumerate(day_templates, start=2):
            days.append({
                "day": i,
                "focus": dim,
                "title": title,
                "description": desc,
                "action_type": "practice",
                "estimated_time": time,
                "resource_link": None,
                "why": f"Building muscle memory in {dim} through consistent micro-practice.",
                "quick_win": False,
            })
        
        return days
    
    def _generate_weeks(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int, stage: Dict) -> List[Dict]:
        """Week-by-week structured plan with measurable milestones."""
        
        # Determine how many weeks based on hours and stage
        gap_to_next = max(0, self._next_stage_threshold(stage) - round(sum(s for _, s in sorted_dims)/len(sorted_dims)))
        estimated_weeks = max(2, min(8, math.ceil(gap_to_next / max(5, hours * 0.3)))) if hours else 4
        
        weeks = []
        for week_num in range(1, estimated_weeks + 1):
            # Rotate focus dimensions
            focus_dims = [d for d, _ in sorted_dims[:2]]
            focus = focus_dims[(week_num - 1) % len(focus_dims)] if focus_dims else "literacy"
            
            actions = self._week_actions(week_num, focus, persona_id, hours, budget, stage)
            
            weeks.append({
                "week": week_num,
                "focus_dimension": focus,
                "theme": self._week_theme(week_num, stage),
                "milestone": self._week_milestone(week_num, focus, stage),
                "actions": actions,
                "estimated_hours": sum(a.get("estimated_hours", 0) for a in actions),
                "checkpoint": f"Score {min(95, 20 + week_num * 10)}% in {focus} dimension",
            })
        
        return weeks
    
    def _week_actions(self, week: int, focus: str, persona_id: str, hours: int, budget: int, stage: Dict) -> List[Dict]:
        """Generate specific actions for a week."""
        
        actions = []
        
        # Core learning block (always present)
        if focus == "literacy":
            actions.append({
                "title": f"Week {week}: Deep-dive into AI fundamentals",
                "description": "Study transformer architecture, attention mechanisms, and model families.",
                "type": "course",
                "resource": "HuggingFace NLP Course",
                "link": "https://huggingface.co/learn/nlp-course",
                "estimated_hours": 2,
                "deliverable": "Complete 2 chapters + quiz",
                "cost": "$0",
            })
        elif focus == "tooling":
            actions.append({
                "title": f"Week {week}: Master one new AI tool",
                "description": "Deep exploration of one tool: Claude, Cursor, or a local model runner.",
                "type": "lab",
                "resource": "Tool documentation + 3 real tasks",
                "link": None,
                "estimated_hours": 2,
                "deliverable": "Complete 3 real work tasks using the tool",
                "cost": "$0" if budget == 0 else "$0-20",
            })
        elif focus == "strategy":
            actions.append({
                "title": f"Week {week}: Evaluate 2 AI opportunities",
                "description": "Map processes at work. Score by feasibility × impact. Present to one colleague.",
                "type": "workshop",
                "resource": "AI Use Case Canvas",
                "link": "https://aiusecase.io",
                "estimated_hours": 2,
                "deliverable": "One-page opportunity brief",
                "cost": "$0",
            })
        elif focus == "implementation":
            actions.append({
                "title": f"Week {week}: Build a working prototype",
                "description": "Create a RAG pipeline, agent, or API integration. Ship to a friend for feedback.",
                "type": "lab",
                "resource": "Dify or Flowise for no-code; LangChain for code",
                "link": "https://dify.ai",
                "estimated_hours": 3,
                "deliverable": "Working prototype + demo video",
                "cost": "$0",
            })
        elif focus == "governance":
            actions.append({
                "title": f"Week {week}: Draft your AI policy",
                "description": "Cover approved tools, data classification, review requirements.",
                "type": "workshop",
                "resource": "NIST AI RMF Template",
                "link": "https://www.nist.gov/artificial-intelligence/ai-risk-management-framework",
                "estimated_hours": 2,
                "deliverable": "1-page team AI policy draft",
                "cost": "$0",
            })
        elif focus == "data":
            actions.append({
                "title": f"Week {week}: Data pipeline practice",
                "description": "Clean a dataset, build embeddings, or set up a vector DB.",
                "type": "lab",
                "resource": "ChromaDB or Weaviate tutorials",
                "link": "https://docs.trychroma.com",
                "estimated_hours": 2,
                "deliverable": "Working vector search over your documents",
                "cost": "$0",
            })
        
        # Reflection action (every week)
        actions.append({
            "title": f"Week {week} reflection",
            "description": "Review what worked. Note one thing that surprised you. Adjust next week's plan.",
            "type": "reflection",
            "resource": "Personal learning journal",
            "link": None,
            "estimated_hours": 0.5,
            "deliverable": "3 bullet journal entries",
            "cost": "$0",
        })
        
        return actions
    
    def _week_theme(self, week: int, stage: Dict) -> str:
        themes = [
            "Foundation & Discovery",
            "Building Core Skills",
            "Expanding Your Toolkit",
            "Applying to Real Work",
            "Deepening Specialization",
            "Integration & Scale",
            "Governance & Safety",
            "Mastery & Teaching",
        ]
        return themes[(week - 1) % len(themes)]
    
    def _week_milestone(self, week: int, focus: str, stage: Dict) -> str:
        return f"Complete {week} week(s) of focused practice in {focus}"
    
    def _next_stage_threshold(self, current_stage: Dict) -> int:
        thresholds = [20, 40, 60, 75, 90, 100]
        current = current_stage["threshold"]
        for t in thresholds:
            if t > current:
                return t
        return 100
    
    def _generate_months(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int, stage: Dict, hardware_id: Optional[str]) -> List[Dict]:
        """Month-level strategic goals with outcomes."""
        
        months = []
        for month_num in range(1, 4):  # 3-month horizon
            goals = []
            
            if month_num == 1:
                goals = [
                    {"title": "Close weakest gap to 50%", "metric": f"{sorted_dims[0][0]} >= 50%", "tactics": ["Daily micro-practice", "One course completion", "Peer discussion"]}
                ]
                if persona_id in ["ml-eng", "swe", "data-sci"]:
                    goals.append({"title": "Ship one AI-assisted code project", "metric": "1 repo with AI integration", "tactics": ["Cursor/Claude Code", "API integration", "Document your approach"]})
                
            elif month_num == 2:
                goals = [
                    {"title": "Build cross-dimensional fluency", "metric": "All dimensions >= 45%", "tactics": ["Rotate weekly focus", "Interdisciplinary projects", "Teach a colleague"]}
                ]
                if hardware_id:
                    goals.append({"title": "Run local models for 50% of AI tasks", "metric": "Local inference usage >= 50%", "tactics": ["Ollama setup", "Model comparison", "Latency optimization"]})
                
            else:  # month 3
                goals = [
                    {"title": "Lead an AI initiative", "metric": "One shipped AI project or team workshop", "tactics": ["Identify opportunity", "Build consensus", "Execute with metrics"]}
                ]
                if sum(s for _, s in sorted_dims) / len(sorted_dims) >= 60:
                    goals.append({"title": "Mentor 2 colleagues into AI fluency", "metric": "2 people show measurable improvement", "tactics": ["Weekly office hours", "Curated resources", "Accountability check-ins"]})
            
            months.append({
                "month": month_num,
                "theme": ["Build Foundation", "Expand & Integrate", "Lead & Scale"][month_num - 1],
                "strategic_goals": goals,
                "checkpoint": f"Overall score target: {min(95, stage['threshold'] + month_num * 10)}%",
                "review_questions": [
                    "What was the biggest surprise this month?",
                    "Which action had the highest ROI?",
                    "What gap still feels hardest to close?",
                    "Who can you teach what you learned?",
                ],
            })
        
        return months
    
    def _compute_projections(self, overall: int, stage: Dict, hours_per_week: int) -> Dict:
        """Project timeline to next stage."""
        next_threshold = self._next_stage_threshold(stage)
        gap = max(0, next_threshold - overall)
        
        if hours_per_week and gap > 0:
            # Rough estimate: 1 point improvement per 2 focused hours
            hours_needed = gap * 2
            weeks_needed = max(1, math.ceil(hours_needed / hours_per_week))
            target_date = datetime.utcnow() + timedelta(weeks=weeks_needed)
            return {
                "current_stage": stage["label"],
                "next_stage": self.stages[min(self.stages.index(stage) + 1, len(self.stages) - 1)]["label"],
                "gap_to_next": gap,
                "estimated_weeks": weeks_needed,
                "at_hours_per_week": hours_per_week,
                "projected_reach_date": target_date.strftime("%b %d, %Y"),
            }
        return {
            "current_stage": stage["label"],
            "next_stage": self.stages[min(self.stages.index(stage) + 1, len(self.stages) - 1)]["label"],
            "gap_to_next": gap,
            "estimated_weeks": None,
            "at_hours_per_week": hours_per_week,
            "projected_reach_date": None,
        }


# ============================================================================
# MAIN ENGINE — Orchestrator
# ============================================================================

class AdaptiveAssessmentEngine:
    """
    Main orchestrator:
    - Manages sessions
    - Adaptive question selection via IRT
    - Bayesian knowledge tracing
    - Generates learning paths
    """
    
    def __init__(self):
        self.question_bank = build_question_bank()
        self.irt = IRTEngine()
        self.selector = AdaptiveSelector(min_per_dimension=1, max_total=12)
        self.tracer = KnowledgeTracer()
        self.path_gen = LearningPathGenerator()
        self.sessions: Dict[str, StudentState] = {}
    
    def start_session(self) -> Dict:
        """Initialize a new assessment session."""
        session_id = str(uuid.uuid4())[:12]
        state = StudentState(
            session_id=session_id,
            theta={d: 0.0 for d in Dimension},
            theta_variance={d: 1.0 for d in Dimension},
        )
        self.sessions[session_id] = state
        
        # Select first question (highest info at theta=0)
        first_q = self.selector.select_next(state, self.question_bank)
        
        return {
            "session_id": session_id,
            "question": self._question_to_dict(first_q) if first_q else None,
            "progress": {"asked": 0, "total": 12, "dimensions_covered": []},
            "status": "in_progress",
        }
    
    def submit_answer(self, session_id: str, question_id: str, option_index: int) -> Dict:
        """Submit an answer and get the next question or results."""
        state = self.sessions.get(session_id)
        if not state:
            return {"error": "Session not found", "status": "error"}
        
        question = next((q for q in self.question_bank if q.id == question_id), None)
        if not question:
            return {"error": "Question not found", "status": "error"}
        
        # Update knowledge state
        state = self.tracer.update(state, question, option_index)
        
        # Check if we should stop
        if self.selector.should_stop(state):
            return self._finalize(state)
        
        # Select next question
        next_q = self.selector.select_next(state, self.question_bank)
        
        # Calculate progress
        dim_coverage = set()
        for qid in state.asked_questions:
            q = next((qq for qq in self.question_bank if qq.id == qid), None)
            if q:
                dim_coverage.add(q.dimension.value)
        
        return {
            "session_id": session_id,
            "question": self._question_to_dict(next_q) if next_q else None,
            "progress": {
                "asked": len(state.asked_questions),
                "total": 12,
                "dimensions_covered": list(dim_coverage),
                "current_dimension": next_q.dimension.value if next_q else None,
            },
            "interim_scores": self.tracer.get_dimension_scores(state),
            "status": "in_progress" if next_q else "complete",
        }
    
    def get_results(self, session_id: str) -> Dict:
        """Get final assessment results."""
        state = self.sessions.get(session_id)
        if not state:
            return {"error": "Session not found", "status": "error"}
        return self._finalize(state)
    
    def generate_path(self, session_id: str, persona_id: str, hours_per_week: int, budget_usd: int, hardware_id: Optional[str] = None, preference: Optional[str] = None) -> Dict:
        """Generate learning path from assessment results."""
        state = self.sessions.get(session_id)
        if not state:
            return {"error": "Session not found", "status": "error"}
        
        scores = self.tracer.get_dimension_scores(state)
        path = self.path_gen.generate_learning_path(
            scores, persona_id, hours_per_week, budget_usd, hardware_id, preference
        )
        path["session_id"] = session_id
        return path
    
    def _finalize(self, state: StudentState) -> Dict:
        """Generate final assessment report."""
        scores = self.tracer.get_dimension_scores(state)
        overall = self.tracer.get_overall_score(state)
        stage = self.path_gen.determine_stage(overall)
        archetype = self.path_gen.determine_archetype(scores)
        
        # Strengths and gaps
        sorted_scores = sorted(scores.items(), key=lambda x: x[1])
        
        # Percentile estimation (simplified — can be calibrated with population data)
        # Based on normal distribution assumption
        import scipy.stats as stats
        percentile = int(round(100 * stats.norm.cdf((overall - 50) / 20)))
        percentile = max(1, min(99, percentile))
        
        return {
            "session_id": state.session_id,
            "status": "complete",
            "overall_score": overall,
            "dimension_scores": scores,
            "stage": stage,
            "archetype": archetype,
            "strengths": [
                {"dimension": d, "label": DIMENSION_LABELS.get(Dimension(d), d), "score": s, "color": DIMENSION_COLORS.get(d, "#14B8A6")}
                for d, s in sorted_scores[-2:]
            ],
            "gaps": [
                {"dimension": d, "label": DIMENSION_LABELS.get(Dimension(d), d), "score": s, "color": DIMENSION_COLORS.get(d, "#F43F5E")}
                for d, s in sorted_scores[:2]
            ],
            "percentile": percentile,
            "questions_answered": len(state.asked_questions),
            "response_history": state.response_history,
            "latent_abilities": {d.value: round(t, 2) for d, t in state.theta.items()},
            "measurement_precision": {d.value: round(v, 3) for d, v in state.theta_variance.items()},
        }
    
    def _question_to_dict(self, q: Optional[Question]) -> Optional[Dict]:
        if not q:
            return None
        return {
            "id": q.id,
            "dimension": q.dimension.value,
            "dimension_label": DIMENSION_LABELS.get(q.dimension, q.dimension.value),
            "text": q.text,
            "options": q.options,
            "difficulty": round(q.difficulty, 2),
            "discrimination": round(q.discrimination, 2),
            "concept_tags": q.concept_tags,
        }


# Singleton instance
engine = AdaptiveAssessmentEngine()