Spaces:

Builder-Neekhil
/

archai-adaptive-engine

Configuration error

File size: 45,973 Bytes
"""
ARCHAI Adaptive AI Assessment Engine
===================================
SOTA: 2PL-IRT adaptive selection + Bayesian knowledge tracing + LLM learning paths

Plug-and-play backend for your-ai-arch.netlify.app
Replaces static question bank with adaptive, intelligent assessment.
"""

import json
import math
import random
import uuid
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, field
from enum import Enum
import numpy as np
from scipy.optimize import minimize_scalar

# ============================================================================
# DATA MODELS — compatible with existing archai frontend
# ============================================================================

class Dimension(Enum):
    LITERACY = "literacy"
    TOOLING = "tooling"
    STRATEGY = "strategy"
    IMPLEMENTATION = "implementation"
    GOVERNANCE = "governance"
    DATA = "data"

DIMENSION_LABELS = {
    Dimension.LITERACY: "AI Literacy",
    Dimension.TOOLING: "Tool Proficiency",
    Dimension.STRATEGY: "Strategic Thinking",
    Dimension.IMPLEMENTATION: "Implementation",
    Dimension.GOVERNANCE: "Governance & Ethics",
    Dimension.DATA: "Data Fluency",
}

DIMENSION_COLORS = {
    "literacy": "#FB7185",
    "tooling": "#10B981",
    "strategy": "#F97316",
    "implementation": "#14B8A6",
    "governance": "#F43F5E",
    "data": "#34D399",
}

@dataclass
class Question:
    id: str
    dimension: Dimension
    text: str
    options: List[str]
    difficulty: float  # b parameter in IRT (higher = harder)
    discrimination: float  # a parameter in IRT
    concept_tags: List[str] = field(default_factory=list)

@dataclass
class StudentState:
    """Bayesian knowledge state per dimension."""
    session_id: str
    theta: Dict[Dimension, float]  # latent ability estimate per dimension
    theta_variance: Dict[Dimension, float]
    asked_questions: List[str] = field(default_factory=list)
    responses: Dict[str, int] = field(default_factory=dict)  # question_id -> option_index
    response_history: List[Dict] = field(default_factory=list)
    start_time: str = field(default_factory=lambda: datetime.utcnow().isoformat())
    
    def get_unasked(self, question_bank: List[Question]) -> List[Question]:
        return [q for q in question_bank if q.id not in self.asked_questions]


# ============================================================================
# QUESTION BANK — Calibrated with IRT parameters
# ============================================================================

def build_question_bank() -> List[Question]:
    """Calibrated question bank mapped to archai's 6 dimensions."""
    bank = []
    
    # --- LITERACY ---
    bank.extend([
        Question("lit_1", Dimension.LITERACY,
            "How well can you explain the difference between machine learning, deep learning, and generative AI?",
            ["Not at all", "Basic overview", "Clearly with examples", "Could teach a workshop"],
            -2.0, 1.2, ["ml_basics", "dl_vs_ml", "gen_ai"]),
        Question("lit_2", Dimension.LITERACY,
            "How familiar are you with concepts like tokens, context windows, fine-tuning, and RAG?",
            ["Never heard of them", "Heard the terms", "Understand conceptually", "Use them in practice"],
            -1.0, 1.5, ["tokens", "rag", "fine_tuning"]),
        Question("lit_3", Dimension.LITERACY,
            "Can you explain what a transformer architecture is and how attention mechanisms work?",
            ["No idea", "Vague understanding", "Can explain to a peer", "Can implement from scratch"],
            0.5, 1.8, ["transformers", "attention", "architecture"]),
        Question("lit_4", Dimension.LITERACY,
            "How well do you understand the scaling laws that govern LLM performance?",
            ["Never heard", "Basic awareness", "Can discuss tradeoffs", "Can apply to model selection"],
            1.5, 2.0, ["scaling_laws", "compute", "model_selection"]),
    ])
    
    # --- TOOLING ---
    bank.extend([
        Question("tool_1", Dimension.TOOLING,
            "How frequently do you use AI tools (ChatGPT, Copilot, Claude, etc.) in your work?",
            ["Never", "Occasionally", "Weekly", "Daily, core to workflow"],
            -2.0, 1.0, ["chatgpt", "copilot", "claude", "usage_frequency"]),
        Question("tool_2", Dimension.TOOLING,
            "Can you chain multiple AI tools or prompts to complete a complex task end-to-end?",
            ["No", "Tried once or twice", "Sometimes successfully", "Regularly with custom workflows"],
            -0.5, 1.3, ["prompt_chaining", "tool_orchestration", "workflows"]),
        Question("tool_3", Dimension.TOOLING,
            "Have you set up API integrations with LLM providers (OpenAI, Anthropic, local models)?",
            ["Never", "Used a no-code tool", "Wrote code for it", "Built production integrations"],
            0.5, 1.5, ["api_integration", "openai_api", "local_models"]),
        Question("tool_4", Dimension.TOOLING,
            "How comfortable are you running open-source models locally with Ollama, LM Studio, or vLLM?",
            ["Don't know what those are", "Installed one once", "Run models regularly", "Optimize inference for production"],
            1.5, 1.8, ["ollama", "lm_studio", "vllm", "local_inference"]),
    ])
    
    # --- STRATEGY ---
    bank.extend([
        Question("strat_1", Dimension.STRATEGY,
            "When evaluating a new project, do you assess where AI could add value or reduce effort?",
            ["Never consider it", "Occasionally think about it", "Systematically evaluate", "Lead AI-first ideation"],
            -1.5, 1.1, ["ai_opportunity", "value_assessment", "project_evaluation"]),
        Question("strat_2", Dimension.STRATEGY,
            "Can you articulate the ROI or business case for an AI initiative to stakeholders?",
            ["Wouldn't know where to start", "Could outline rough benefits", "Can build a structured case", "Have done this successfully"],
            -0.5, 1.2, ["roi", "business_case", "stakeholder_communication"]),
        Question("strat_3", Dimension.STRATEGY,
            "Do you have a framework for prioritizing AI initiatives by feasibility vs impact?",
            ["No framework", "Informal mental model", "Structured scoring system", "Organization-wide prioritization process"],
            0.8, 1.6, ["prioritization", "feasibility", "impact_matrix"]),
        Question("strat_4", Dimension.STRATEGY,
            "Can you identify competitive moats and differentiation through AI capabilities?",
            ["Not applicable to my role", "Basic understanding", "Can analyze for my industry", "Have built AI-driven differentiation"],
            1.8, 2.0, ["competitive_moat", "differentiation", "ai_strategy"]),
    ])
    
    # --- IMPLEMENTATION ---
    bank.extend([
        Question("impl_1", Dimension.IMPLEMENTATION,
            "Have you built, deployed, or significantly configured an AI-powered solution?",
            ["Never", "Followed a tutorial", "Built a working prototype", "Deployed to production"],
            -1.5, 1.2, ["deployment", "prototype", "production"]),
        Question("impl_2", Dimension.IMPLEMENTATION,
            "How comfortable are you with prompt engineering, API integration, or model evaluation?",
            ["Not at all", "Basic awareness", "Can do with guidance", "Highly proficient"],
            -0.5, 1.4, ["prompt_engineering", "api_integration", "model_eval"]),
        Question("impl_3", Dimension.IMPLEMENTATION,
            "Have you built a RAG system or fine-tuned a model for a specific domain?",
            ["Don't know what RAG is", "Used a no-code RAG tool", "Built custom RAG pipeline", "Fine-tuned and deployed domain model"],
            0.8, 1.6, ["rag", "fine_tuning", "domain_adaptation"]),
        Question("impl_4", Dimension.IMPLEMENTATION,
            "Can you architect a multi-agent system or design LLM orchestration workflows?",
            ["No idea", "Understand conceptually", "Built a simple agent", "Production multi-agent system"],
            1.8, 1.9, ["agents", "orchestration", "langgraph", "crewai"]),
    ])
    
    # --- GOVERNANCE ---
    bank.extend([
        Question("gov_1", Dimension.GOVERNANCE,
            "How well do you understand AI risks like hallucination, bias, data privacy, and IP exposure?",
            ["Not aware", "Heard about them", "Understand key risks", "Can design mitigations"],
            -1.5, 1.0, ["hallucination", "bias", "privacy", "ip_risk"]),
        Question("gov_2", Dimension.GOVERNANCE,
            "Does your workflow include checks for AI output accuracy, fairness, or compliance?",
            ["No checks", "Occasional review", "Standard process", "Systematic governance framework"],
            -0.3, 1.2, ["accuracy_checks", "fairness", "compliance"]),
        Question("gov_3", Dimension.GOVERNANCE,
            "Are you familiar with AI regulations (EU AI Act, NIST AI RMF, ISO 42001)?",
            ["Never heard", "Aware they exist", "Can navigate requirements", "Implemented compliance program"],
            0.8, 1.5, ["eu_ai_act", "nist_rmf", "iso_42001", "regulation"]),
        Question("gov_4", Dimension.GOVERNANCE,
            "Can you design an AI governance framework covering data lineage, model cards, and audit trails?",
            ["Not my area", "Understand components", "Can design for a team", "Enterprise-wide implementation"],
            1.8, 1.8, ["governance_framework", "model_cards", "audit_trail", "data_lineage"]),
    ])
    
    # --- DATA ---
    bank.extend([
        Question("data_1", Dimension.DATA,
            "How comfortable are you working with structured and unstructured data for AI use cases?",
            ["Uncomfortable", "Can read simple reports", "Can clean and prep data", "Can architect data pipelines"],
            -1.5, 1.1, ["structured_data", "unstructured_data", "data_prep"]),
        Question("data_2", Dimension.DATA,
            "Can you evaluate whether data is sufficient and appropriate for training or prompting an AI system?",
            ["No", "Vaguely", "With guidance", "Yes, independently"],
            -0.3, 1.3, ["data_quality", "data_sufficiency", "training_data"]),
        Question("data_3", Dimension.DATA,
            "Have you worked with embeddings, vector databases, or data augmentation for AI?",
            ["No experience", "Used a vector DB via UI", "Built embedding pipelines", "Optimized retrieval systems"],
            0.8, 1.5, ["embeddings", "vector_db", "data_augmentation", "retrieval"]),
        Question("data_4", Dimension.DATA,
            "Can you design data collection strategies and evaluate dataset bias for model training?",
            ["Not applicable", "Basic awareness", "Can assess existing datasets", "Design collection from scratch"],
            1.6, 1.7, ["data_collection", "dataset_bias", "training_strategy"]),
    ])
    
    return bank


# ============================================================================
# IRT ENGINE — 2PL Model with Fisher Information
# ============================================================================

class IRTEngine:
    """
    Two-Parameter Logistic (2PL) IRT model.
    P(correct|theta) = sigmoid(a * (theta - b))
    """
    
    @staticmethod
    def sigmoid(z: float) -> float:
        return 1.0 / (1.0 + math.exp(-z))
    
    @staticmethod
    def probability(theta: float, a: float, b: float) -> float:
        """Probability of a correct (high-score) response."""
        return IRTEngine.sigmoid(a * (theta - b))
    
    @staticmethod
    def fisher_information(theta: float, a: float, b: float) -> float:
        """Fisher information — measure of how precisely a question measures ability at theta."""
        p = IRTEngine.probability(theta, a, b)
        return (a ** 2) * p * (1 - p)
    
    @staticmethod
    def likelihood(theta: float, responses: List[Tuple[float, float, int]], max_option: int = 3) -> float:
        """
        Compute likelihood of theta given responses.
        responses: list of (a, b, option_index) tuples.
        option_index 0 = lowest, max_option = highest.
        We model this as a graded response model approximation.
        """
        log_lik = 0.0
        for a, b, opt_idx in responses:
            # Map option to a "correctness weight" 0.0 to 1.0
            weight = opt_idx / max_option
            # Expected probability of this weighted response
            p = IRTEngine.probability(theta, a, b)
            # Weighted likelihood: blend of correct and incorrect
            # Higher option → closer to p=1, lower option → closer to p=0
            expected = weight * p + (1 - weight) * (1 - p)
            expected = max(expected, 1e-10)  # avoid log(0)
            log_lik += math.log(expected)
        return log_lik
    
    @staticmethod
    def estimate_theta(responses: List[Tuple[float, float, int]], prior_mean: float = 0.0, prior_std: float = 1.0) -> Tuple[float, float]:
        """
        MAP estimate of theta given responses.
        Returns (theta_estimate, standard_error).
        """
        if not responses:
            return prior_mean, prior_std
        
        # Prior contribution to log-posterior
        def neg_log_posterior(theta):
            log_prior = -0.5 * ((theta - prior_mean) / prior_std) ** 2
            log_lik = IRTEngine.likelihood(theta, responses)
            return -(log_prior + log_lik)
        
        result = minimize_scalar(neg_log_posterior, bounds=(-4.0, 4.0), method='bounded')
        theta_hat = result.x
        
        # Approximate standard error from Fisher information at MAP
        fisher = sum(IRTEngine.fisher_information(theta_hat, a, b) for a, b, _ in responses)
        se = 1.0 / math.sqrt(fisher + 1.0 / (prior_std ** 2))
        
        return theta_hat, se


# ============================================================================
# ADAPTIVE SELECTOR — Fisher Information Maximization
# ============================================================================

class AdaptiveSelector:
    """
    Selects next question maximizing Fisher information at current ability estimate.
    Implements content balancing (ensures all dimensions are covered).
    """
    
    def __init__(self, min_per_dimension: int = 1, max_total: int = 12, target_precision: float = 0.3):
        self.min_per_dimension = min_per_dimension
        self.max_total = max_total
        self.target_precision = target_precision
    
    def select_next(
        self,
        state: StudentState,
        question_bank: List[Question],
        balance_penalty: float = 2.0
    ) -> Optional[Question]:
        """
        Select next question using Fisher information with content balancing.
        """
        unasked = state.get_unasked(question_bank)
        if not unasked:
            return None
        
        # Count questions per dimension already asked
        dim_counts = {d: 0 for d in Dimension}
        for qid in state.asked_questions:
            q = next((qq for qq in question_bank if qq.id == qid), None)
            if q:
                dim_counts[q.dimension] += 1
        
        # Information scores
        scores = []
        for q in unasked:
            theta = state.theta.get(q.dimension, 0.0)
            info = IRTEngine.fisher_information(theta, q.discrimination, q.difficulty)
            
            # Content balancing: boost under-represented dimensions
            count = dim_counts[q.dimension]
            if count < self.min_per_dimension:
                info *= balance_penalty * (self.min_per_dimension - count + 1)
            
            # Precision stopping: if SE is already good, slightly deprioritize
            se = state.theta_variance.get(q.dimension, 1.0)
            if se < self.target_precision:
                info *= 0.7
            
            scores.append((info, q))
        
        scores.sort(key=lambda x: x[0], reverse=True)
        
        # Return top-scoring question
        return scores[0][1] if scores else None
    
    def should_stop(self, state: StudentState) -> bool:
        """Stop when max questions reached or all dimensions have sufficient precision."""
        if len(state.asked_questions) >= self.max_total:
            return True
        
        # Stop early if all dimensions have good precision and minimum coverage
        dim_coverage = {d: 0 for d in Dimension}
        dim_precision = {d: float('inf') for d in Dimension}
        
        for qid in state.asked_questions:
            q = next((qq for qq in build_question_bank() if qq.id == qid), None)
            if q:
                dim_coverage[q.dimension] += 1
                dim_precision[q.dimension] = min(
                    dim_precision[q.dimension],
                    state.theta_variance.get(q.dimension, 1.0)
                )
        
        all_covered = all(c >= self.min_per_dimension for c in dim_coverage.values())
        all_precise = all(se < self.target_precision for se in dim_precision.values() if se != float('inf'))
        
        return all_covered and all_precise and len(state.asked_questions) >= 6


# ============================================================================
# KNOWLEDGE TRACING — Bayesian Update
# ============================================================================

class KnowledgeTracer:
    """
    Bayesian knowledge tracing per dimension.
    Updates latent ability (theta) after each response.
    """
    
    def __init__(self, prior_mean: float = 0.0, prior_std: float = 1.0):
        self.prior_mean = prior_mean
        self.prior_std = prior_std
        self.irt = IRTEngine()
    
    def update(
        self,
        state: StudentState,
        question: Question,
        option_index: int,
        max_option: int = 3
    ) -> StudentState:
        """Update student state with new response using Bayesian IRT."""
        
        dim = question.dimension
        
        # Add to history
        state.asked_questions.append(question.id)
        state.responses[question.id] = option_index
        state.response_history.append({
            "question_id": question.id,
            "dimension": dim.value,
            "option_index": option_index,
            "timestamp": datetime.utcnow().isoformat(),
        })
        
        # Gather all responses for this dimension
        dim_responses = []
        for qid, opt in state.responses.items():
            q = next((qq for qq in build_question_bank() if qq.id == qid), None)
            if q and q.dimension == dim:
                dim_responses.append((q.discrimination, q.difficulty, opt))
        
        # Re-estimate theta for this dimension
        theta, se = self.irt.estimate_theta(dim_responses, self.prior_mean, self.prior_std)
        state.theta[dim] = theta
        state.theta_variance[dim] = se
        
        return state
    
    def get_dimension_scores(self, state: StudentState) -> Dict[str, int]:
        """Convert latent theta to 0-100 scores (archai-compatible)."""
        scores = {}
        for dim in Dimension:
            theta = state.theta.get(dim, 0.0)
            # Convert theta (-4 to 4) to 0-100 with sigmoid
            # theta=0 → 50%, theta=2 → ~88%, theta=-2 → ~12%
            score = int(round(100 * self.irt.sigmoid(theta * 0.8 + 0.1) * 1.1))
            score = max(5, min(95, score))
            scores[dim.value] = score
        return scores
    
    def get_overall_score(self, state: StudentState) -> int:
        scores = self.get_dimension_scores(state)
        return round(sum(scores.values()) / len(scores))


# ============================================================================
# LEARNING PATH GENERATOR — Structured day/week/month actionables
# ============================================================================

class LearningPathGenerator:
    """
    Generates granular learning paths with day/week/month actionables.
    Uses rule-based logic aligned with archai's action plan structure.
    """
    
    def __init__(self):
        self.stages = [
            {"id": "awareness", "label": "Awareness", "threshold": 20, "desc": "You recognize AI's potential"},
            {"id": "understanding", "label": "Understanding", "threshold": 40, "desc": "You grasp core concepts"},
            {"id": "application", "label": "Application", "threshold": 60, "desc": "You use AI daily"},
            {"id": "integration", "label": "Integration", "threshold": 75, "desc": "AI is embedded in your work"},
            {"id": "mastery", "label": "Mastery", "threshold": 90, "desc": "You architect AI systems"},
        ]
        
        self.archetypes = [
            {"id": "pioneer", "label": "The Pioneer", "desc": "High across the board — charting new territory",
             "condition": lambda s: all(v >= 70 for v in s.values())},
            {"id": "responsible-builder", "label": "The Responsible Builder", "desc": "Balances capability with caution",
             "condition": lambda s: s.get("governance", 0) >= 60 and s.get("implementation", 0) >= 50},
            {"id": "data-craftsman", "label": "The Data Craftsman", "desc": "Data-first, builds from evidence",
             "condition": lambda s: s.get("data", 0) >= 60 and s.get("implementation", 0) >= 50},
            {"id": "power-user", "label": "The Power User", "desc": "Fluent with tools, ready to strategize next",
             "condition": lambda s: s.get("tooling", 0) >= 60 and s.get("strategy", 0) < 50},
            {"id": "vision-caster", "label": "The Vision Caster", "desc": "Strategic thinker — hands-on comes next",
             "condition": lambda s: s.get("strategy", 0) >= 60 and s.get("implementation", 0) < 50},
            {"id": "integrator", "label": "The Integrator", "desc": "Well-rounded across every dimension",
             "condition": lambda s: (avg := sum(s.values())/len(s.values()), sd := (sum((v-avg)**2 for v in s.values())/len(s.values()))**0.5, sd < 18 and avg >= 50)[2]},
            {"id": "explorer", "label": "The Explorer", "desc": "Curious and ready to dive in",
             "condition": lambda s: all(v < 45 for v in s.values())},
            {"id": "apprentice", "label": "The Apprentice", "desc": "Building foundational fluency",
             "condition": lambda s: True},  # fallback
        ]
    
    def determine_stage(self, overall_score: int) -> Dict:
        stage = self.stages[0]
        for s in self.stages:
            if overall_score >= s["threshold"]:
                stage = s
        return stage
    
    def determine_archetype(self, scores: Dict[str, int]) -> Dict:
        for arch in self.archetypes:
            try:
                if arch["condition"](scores):
                    return {"id": arch["id"], "label": arch["label"], "desc": arch["desc"]}
            except:
                continue
        return {"id": "apprentice", "label": "The Apprentice", "desc": "Building foundational fluency"}
    
    def generate_learning_path(
        self,
        scores: Dict[str, int],
        persona_id: str,
        hours_per_week: int,
        budget_usd: int,
        hardware_id: Optional[str] = None,
        preference: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Generate a comprehensive learning path with day/week/month granularity.
        """
        overall = round(sum(scores.values()) / len(scores))
        stage = self.determine_stage(overall)
        archetype = self.determine_archetype(scores)
        
        # Identify weakest dimensions (gaps to close)
        sorted_dims = sorted(scores.items(), key=lambda x: x[1])
        
        # Build time-bucketed actionables
        days = self._generate_days(sorted_dims, persona_id, hours_per_week, budget_usd)
        weeks = self._generate_weeks(sorted_dims, persona_id, hours_per_week, budget_usd, stage)
        months = self._generate_months(sorted_dims, persona_id, hours_per_week, budget_usd, stage, hardware_id)
        
        return {
            "overall_score": overall,
            "stage": stage,
            "archetype": archetype,
            "dimension_scores": scores,
            "gaps": [{"dimension": d, "score": s, "priority": i+1} for i, (d, s) in enumerate(sorted_dims[:3])],
            "strengths": [{"dimension": d, "score": s} for d, s in sorted_dims[-2:]],
            "learning_path": {
                "days": days,
                "weeks": weeks,
                "months": months,
            },
            "projections": self._compute_projections(overall, stage, hours_per_week),
            "meta": {
                "total_hours": sum(a.get("estimated_hours", 0) for w in weeks for a in w["actions"]),
                "estimated_weeks": max(1, round(sum(a.get("estimated_hours", 0) for w in weeks for a in w["actions"]) / hours_per_week)) if hours_per_week else None,
                "generated_at": datetime.utcnow().isoformat(),
            }
        }
    
    def _generate_days(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int) -> List[Dict]:
        """Day 1-7 granular actionables — immediate, bite-sized wins."""
        weakest = sorted_dims[0][0]
        
        # Day 1: Always start with the weakest dimension
        day1_actions = {
            "literacy": {
                "title": "Read the Anthropic Prompt Engineering Guide",
                "desc": "The highest-ROI single hour. Changes how you talk to every model.",
                "link": "https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/overview",
                "time": "45 min",
                "type": "reading",
            },
            "tooling": {
                "title": "Try Google AI Studio with a real work task",
                "desc": "Open AI Studio. Paste any work email or doc. Ask: 'What am I missing?'",
                "link": "https://aistudio.google.com",
                "time": "15 min",
                "type": "hands_on",
            },
            "strategy": {
                "title": "List 3 weekly tasks you hate",
                "desc": "Open Notes. Write the 3 most repetitive things you did this week.",
                "link": None,
                "time": "10 min",
                "type": "worksheet",
            },
            "implementation": {
                "title": "Install Claude Code or Cursor",
                "desc": "One terminal command. You'll have an AI pair programmer before lunch.",
                "link": "https://docs.anthropic.com/en/docs/claude-code",
                "time": "10 min",
                "type": "setup",
            },
            "governance": {
                "title": "Skim the NIST AI RMF index",
                "desc": "Five minutes tells you what you don't know. The framework is free.",
                "link": "https://www.nist.gov/itl/ai-risk-management-framework",
                "time": "15 min",
                "type": "reading",
            },
            "data": {
                "title": "Ask Gemini about your spreadsheet",
                "desc": "Open any Google Sheet. Use the side panel: 'Summarize this for me.'",
                "link": "https://workspace.google.com/products/gemini/",
                "time": "5 min",
                "type": "hands_on",
            },
        }
        
        days = []
        
        # Day 1: Close biggest gap
        action = day1_actions.get(weakest, day1_actions["literacy"])
        days.append({
            "day": 1,
            "focus": f"Close your {weakest} gap",
            "title": action["title"],
            "description": action["desc"],
            "action_type": action["type"],
            "estimated_time": action["time"],
            "resource_link": action["link"],
            "why": f"Your {weakest} score is lowest. A small win here unlocks everything else.",
            "quick_win": True,
        })
        
        # Day 2-7: Rotating through dimensions
        day_templates = [
            ("tooling", "Daily AI tool practice", "Use an AI tool for one real work task today.", "15 min"),
            ("literacy", "Watch one AI explainer", "Pick a 10-min video on YouTube about LLMs, RAG, or agents.", "15 min"),
            ("implementation", "Build something tiny", "Create a prompt template or simple automation.", "30 min"),
            ("strategy", "Map one AI opportunity", "Pick a work process. Ask: how could AI help?", "20 min"),
            ("governance", "Review one AI risk", "Read about one AI failure case. What went wrong?", "15 min"),
            ("data", "Explore your data", "Open a dataset you use. What patterns could AI find?", "20 min"),
        ]
        
        for i, (dim, title, desc, time) in enumerate(day_templates, start=2):
            days.append({
                "day": i,
                "focus": dim,
                "title": title,
                "description": desc,
                "action_type": "practice",
                "estimated_time": time,
                "resource_link": None,
                "why": f"Building muscle memory in {dim} through consistent micro-practice.",
                "quick_win": False,
            })
        
        return days
    
    def _generate_weeks(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int, stage: Dict) -> List[Dict]:
        """Week-by-week structured plan with measurable milestones."""
        
        # Determine how many weeks based on hours and stage
        gap_to_next = max(0, self._next_stage_threshold(stage) - round(sum(s for _, s in sorted_dims)/len(sorted_dims)))
        estimated_weeks = max(2, min(8, math.ceil(gap_to_next / max(5, hours * 0.3)))) if hours else 4
        
        weeks = []
        for week_num in range(1, estimated_weeks + 1):
            # Rotate focus dimensions
            focus_dims = [d for d, _ in sorted_dims[:2]]
            focus = focus_dims[(week_num - 1) % len(focus_dims)] if focus_dims else "literacy"
            
            actions = self._week_actions(week_num, focus, persona_id, hours, budget, stage)
            
            weeks.append({
                "week": week_num,
                "focus_dimension": focus,
                "theme": self._week_theme(week_num, stage),
                "milestone": self._week_milestone(week_num, focus, stage),
                "actions": actions,
                "estimated_hours": sum(a.get("estimated_hours", 0) for a in actions),
                "checkpoint": f"Score {min(95, 20 + week_num * 10)}% in {focus} dimension",
            })
        
        return weeks
    
    def _week_actions(self, week: int, focus: str, persona_id: str, hours: int, budget: int, stage: Dict) -> List[Dict]:
        """Generate specific actions for a week."""
        
        actions = []
        
        # Core learning block (always present)
        if focus == "literacy":
            actions.append({
                "title": f"Week {week}: Deep-dive into AI fundamentals",
                "description": "Study transformer architecture, attention mechanisms, and model families.",
                "type": "course",
                "resource": "HuggingFace NLP Course",
                "link": "https://huggingface.co/learn/nlp-course",
                "estimated_hours": 2,
                "deliverable": "Complete 2 chapters + quiz",
                "cost": "$0",
            })
        elif focus == "tooling":
            actions.append({
                "title": f"Week {week}: Master one new AI tool",
                "description": "Deep exploration of one tool: Claude, Cursor, or a local model runner.",
                "type": "lab",
                "resource": "Tool documentation + 3 real tasks",
                "link": None,
                "estimated_hours": 2,
                "deliverable": "Complete 3 real work tasks using the tool",
                "cost": "$0" if budget == 0 else "$0-20",
            })
        elif focus == "strategy":
            actions.append({
                "title": f"Week {week}: Evaluate 2 AI opportunities",
                "description": "Map processes at work. Score by feasibility × impact. Present to one colleague.",
                "type": "workshop",
                "resource": "AI Use Case Canvas",
                "link": "https://aiusecase.io",
                "estimated_hours": 2,
                "deliverable": "One-page opportunity brief",
                "cost": "$0",
            })
        elif focus == "implementation":
            actions.append({
                "title": f"Week {week}: Build a working prototype",
                "description": "Create a RAG pipeline, agent, or API integration. Ship to a friend for feedback.",
                "type": "lab",
                "resource": "Dify or Flowise for no-code; LangChain for code",
                "link": "https://dify.ai",
                "estimated_hours": 3,
                "deliverable": "Working prototype + demo video",
                "cost": "$0",
            })
        elif focus == "governance":
            actions.append({
                "title": f"Week {week}: Draft your AI policy",
                "description": "Cover approved tools, data classification, review requirements.",
                "type": "workshop",
                "resource": "NIST AI RMF Template",
                "link": "https://www.nist.gov/artificial-intelligence/ai-risk-management-framework",
                "estimated_hours": 2,
                "deliverable": "1-page team AI policy draft",
                "cost": "$0",
            })
        elif focus == "data":
            actions.append({
                "title": f"Week {week}: Data pipeline practice",
                "description": "Clean a dataset, build embeddings, or set up a vector DB.",
                "type": "lab",
                "resource": "ChromaDB or Weaviate tutorials",
                "link": "https://docs.trychroma.com",
                "estimated_hours": 2,
                "deliverable": "Working vector search over your documents",
                "cost": "$0",
            })
        
        # Reflection action (every week)
        actions.append({
            "title": f"Week {week} reflection",
            "description": "Review what worked. Note one thing that surprised you. Adjust next week's plan.",
            "type": "reflection",
            "resource": "Personal learning journal",
            "link": None,
            "estimated_hours": 0.5,
            "deliverable": "3 bullet journal entries",
            "cost": "$0",
        })
        
        return actions
    
    def _week_theme(self, week: int, stage: Dict) -> str:
        themes = [
            "Foundation & Discovery",
            "Building Core Skills",
            "Expanding Your Toolkit",
            "Applying to Real Work",
            "Deepening Specialization",
            "Integration & Scale",
            "Governance & Safety",
            "Mastery & Teaching",
        ]
        return themes[(week - 1) % len(themes)]
    
    def _week_milestone(self, week: int, focus: str, stage: Dict) -> str:
        return f"Complete {week} week(s) of focused practice in {focus}"
    
    def _next_stage_threshold(self, current_stage: Dict) -> int:
        thresholds = [20, 40, 60, 75, 90, 100]
        current = current_stage["threshold"]
        for t in thresholds:
            if t > current:
                return t
        return 100
    
    def _generate_months(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int, stage: Dict, hardware_id: Optional[str]) -> List[Dict]:
        """Month-level strategic goals with outcomes."""
        
        months = []
        for month_num in range(1, 4):  # 3-month horizon
            goals = []
            
            if month_num == 1:
                goals = [
                    {"title": "Close weakest gap to 50%", "metric": f"{sorted_dims[0][0]} >= 50%", "tactics": ["Daily micro-practice", "One course completion", "Peer discussion"]}
                ]
                if persona_id in ["ml-eng", "swe", "data-sci"]:
                    goals.append({"title": "Ship one AI-assisted code project", "metric": "1 repo with AI integration", "tactics": ["Cursor/Claude Code", "API integration", "Document your approach"]})
                
            elif month_num == 2:
                goals = [
                    {"title": "Build cross-dimensional fluency", "metric": "All dimensions >= 45%", "tactics": ["Rotate weekly focus", "Interdisciplinary projects", "Teach a colleague"]}
                ]
                if hardware_id:
                    goals.append({"title": "Run local models for 50% of AI tasks", "metric": "Local inference usage >= 50%", "tactics": ["Ollama setup", "Model comparison", "Latency optimization"]})
                
            else:  # month 3
                goals = [
                    {"title": "Lead an AI initiative", "metric": "One shipped AI project or team workshop", "tactics": ["Identify opportunity", "Build consensus", "Execute with metrics"]}
                ]
                if sum(s for _, s in sorted_dims) / len(sorted_dims) >= 60:
                    goals.append({"title": "Mentor 2 colleagues into AI fluency", "metric": "2 people show measurable improvement", "tactics": ["Weekly office hours", "Curated resources", "Accountability check-ins"]})
            
            months.append({
                "month": month_num,
                "theme": ["Build Foundation", "Expand & Integrate", "Lead & Scale"][month_num - 1],
                "strategic_goals": goals,
                "checkpoint": f"Overall score target: {min(95, stage['threshold'] + month_num * 10)}%",
                "review_questions": [
                    "What was the biggest surprise this month?",
                    "Which action had the highest ROI?",
                    "What gap still feels hardest to close?",
                    "Who can you teach what you learned?",
                ],
            })
        
        return months
    
    def _compute_projections(self, overall: int, stage: Dict, hours_per_week: int) -> Dict:
        """Project timeline to next stage."""
        next_threshold = self._next_stage_threshold(stage)
        gap = max(0, next_threshold - overall)
        
        if hours_per_week and gap > 0:
            # Rough estimate: 1 point improvement per 2 focused hours
            hours_needed = gap * 2
            weeks_needed = max(1, math.ceil(hours_needed / hours_per_week))
            target_date = datetime.utcnow() + timedelta(weeks=weeks_needed)
            return {
                "current_stage": stage["label"],
                "next_stage": self.stages[min(self.stages.index(stage) + 1, len(self.stages) - 1)]["label"],
                "gap_to_next": gap,
                "estimated_weeks": weeks_needed,
                "at_hours_per_week": hours_per_week,
                "projected_reach_date": target_date.strftime("%b %d, %Y"),
            }
        return {
            "current_stage": stage["label"],
            "next_stage": self.stages[min(self.stages.index(stage) + 1, len(self.stages) - 1)]["label"],
            "gap_to_next": gap,
            "estimated_weeks": None,
            "at_hours_per_week": hours_per_week,
            "projected_reach_date": None,
        }


# ============================================================================
# MAIN ENGINE — Orchestrator
# ============================================================================

class AdaptiveAssessmentEngine:
    """
    Main orchestrator:
    - Manages sessions
    - Adaptive question selection via IRT
    - Bayesian knowledge tracing
    - Generates learning paths
    """
    
    def __init__(self):
        self.question_bank = build_question_bank()
        self.irt = IRTEngine()
        self.selector = AdaptiveSelector(min_per_dimension=1, max_total=12)
        self.tracer = KnowledgeTracer()
        self.path_gen = LearningPathGenerator()
        self.sessions: Dict[str, StudentState] = {}
    
    def start_session(self) -> Dict:
        """Initialize a new assessment session."""
        session_id = str(uuid.uuid4())[:12]
        state = StudentState(
            session_id=session_id,
            theta={d: 0.0 for d in Dimension},
            theta_variance={d: 1.0 for d in Dimension},
        )
        self.sessions[session_id] = state
        
        # Select first question (highest info at theta=0)
        first_q = self.selector.select_next(state, self.question_bank)
        
        return {
            "session_id": session_id,
            "question": self._question_to_dict(first_q) if first_q else None,
            "progress": {"asked": 0, "total": 12, "dimensions_covered": []},
            "status": "in_progress",
        }
    
    def submit_answer(self, session_id: str, question_id: str, option_index: int) -> Dict:
        """Submit an answer and get the next question or results."""
        state = self.sessions.get(session_id)
        if not state:
            return {"error": "Session not found", "status": "error"}
        
        question = next((q for q in self.question_bank if q.id == question_id), None)
        if not question:
            return {"error": "Question not found", "status": "error"}
        
        # Update knowledge state
        state = self.tracer.update(state, question, option_index)
        
        # Check if we should stop
        if self.selector.should_stop(state):
            return self._finalize(state)
        
        # Select next question
        next_q = self.selector.select_next(state, self.question_bank)
        
        # Calculate progress
        dim_coverage = set()
        for qid in state.asked_questions:
            q = next((qq for qq in self.question_bank if qq.id == qid), None)
            if q:
                dim_coverage.add(q.dimension.value)
        
        return {
            "session_id": session_id,
            "question": self._question_to_dict(next_q) if next_q else None,
            "progress": {
                "asked": len(state.asked_questions),
                "total": 12,
                "dimensions_covered": list(dim_coverage),
                "current_dimension": next_q.dimension.value if next_q else None,
            },
            "interim_scores": self.tracer.get_dimension_scores(state),
            "status": "in_progress" if next_q else "complete",
        }
    
    def get_results(self, session_id: str) -> Dict:
        """Get final assessment results."""
        state = self.sessions.get(session_id)
        if not state:
            return {"error": "Session not found", "status": "error"}
        return self._finalize(state)
    
    def generate_path(self, session_id: str, persona_id: str, hours_per_week: int, budget_usd: int, hardware_id: Optional[str] = None, preference: Optional[str] = None) -> Dict:
        """Generate learning path from assessment results."""
        state = self.sessions.get(session_id)
        if not state:
            return {"error": "Session not found", "status": "error"}
        
        scores = self.tracer.get_dimension_scores(state)
        path = self.path_gen.generate_learning_path(
            scores, persona_id, hours_per_week, budget_usd, hardware_id, preference
        )
        path["session_id"] = session_id
        return path
    
    def _finalize(self, state: StudentState) -> Dict:
        """Generate final assessment report."""
        scores = self.tracer.get_dimension_scores(state)
        overall = self.tracer.get_overall_score(state)
        stage = self.path_gen.determine_stage(overall)
        archetype = self.path_gen.determine_archetype(scores)
        
        # Strengths and gaps
        sorted_scores = sorted(scores.items(), key=lambda x: x[1])
        
        # Percentile estimation (simplified — can be calibrated with population data)
        # Based on normal distribution assumption
        import scipy.stats as stats
        percentile = int(round(100 * stats.norm.cdf((overall - 50) / 20)))
        percentile = max(1, min(99, percentile))
        
        return {
            "session_id": state.session_id,
            "status": "complete",
            "overall_score": overall,
            "dimension_scores": scores,
            "stage": stage,
            "archetype": archetype,
            "strengths": [
                {"dimension": d, "label": DIMENSION_LABELS.get(Dimension(d), d), "score": s, "color": DIMENSION_COLORS.get(d, "#14B8A6")}
                for d, s in sorted_scores[-2:]
            ],
            "gaps": [
                {"dimension": d, "label": DIMENSION_LABELS.get(Dimension(d), d), "score": s, "color": DIMENSION_COLORS.get(d, "#F43F5E")}
                for d, s in sorted_scores[:2]
            ],
            "percentile": percentile,
            "questions_answered": len(state.asked_questions),
            "response_history": state.response_history,
            "latent_abilities": {d.value: round(t, 2) for d, t in state.theta.items()},
            "measurement_precision": {d.value: round(v, 3) for d, v in state.theta_variance.items()},
        }
    
    def _question_to_dict(self, q: Optional[Question]) -> Optional[Dict]:
        if not q:
            return None
        return {
            "id": q.id,
            "dimension": q.dimension.value,
            "dimension_label": DIMENSION_LABELS.get(q.dimension, q.dimension.value),
            "text": q.text,
            "options": q.options,
            "difficulty": round(q.difficulty, 2),
            "discrimination": round(q.discrimination, 2),
            "concept_tags": q.concept_tags,
        }


# Singleton instance
engine = AdaptiveAssessmentEngine()