Spaces:
Configuration error
Configuration error
File size: 45,973 Bytes
3054503 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 | """
ARCHAI Adaptive AI Assessment Engine
===================================
SOTA: 2PL-IRT adaptive selection + Bayesian knowledge tracing + LLM learning paths
Plug-and-play backend for your-ai-arch.netlify.app
Replaces static question bank with adaptive, intelligent assessment.
"""
import json
import math
import random
import uuid
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, field
from enum import Enum
import numpy as np
from scipy.optimize import minimize_scalar
# ============================================================================
# DATA MODELS — compatible with existing archai frontend
# ============================================================================
class Dimension(Enum):
LITERACY = "literacy"
TOOLING = "tooling"
STRATEGY = "strategy"
IMPLEMENTATION = "implementation"
GOVERNANCE = "governance"
DATA = "data"
DIMENSION_LABELS = {
Dimension.LITERACY: "AI Literacy",
Dimension.TOOLING: "Tool Proficiency",
Dimension.STRATEGY: "Strategic Thinking",
Dimension.IMPLEMENTATION: "Implementation",
Dimension.GOVERNANCE: "Governance & Ethics",
Dimension.DATA: "Data Fluency",
}
DIMENSION_COLORS = {
"literacy": "#FB7185",
"tooling": "#10B981",
"strategy": "#F97316",
"implementation": "#14B8A6",
"governance": "#F43F5E",
"data": "#34D399",
}
@dataclass
class Question:
id: str
dimension: Dimension
text: str
options: List[str]
difficulty: float # b parameter in IRT (higher = harder)
discrimination: float # a parameter in IRT
concept_tags: List[str] = field(default_factory=list)
@dataclass
class StudentState:
"""Bayesian knowledge state per dimension."""
session_id: str
theta: Dict[Dimension, float] # latent ability estimate per dimension
theta_variance: Dict[Dimension, float]
asked_questions: List[str] = field(default_factory=list)
responses: Dict[str, int] = field(default_factory=dict) # question_id -> option_index
response_history: List[Dict] = field(default_factory=list)
start_time: str = field(default_factory=lambda: datetime.utcnow().isoformat())
def get_unasked(self, question_bank: List[Question]) -> List[Question]:
return [q for q in question_bank if q.id not in self.asked_questions]
# ============================================================================
# QUESTION BANK — Calibrated with IRT parameters
# ============================================================================
def build_question_bank() -> List[Question]:
"""Calibrated question bank mapped to archai's 6 dimensions."""
bank = []
# --- LITERACY ---
bank.extend([
Question("lit_1", Dimension.LITERACY,
"How well can you explain the difference between machine learning, deep learning, and generative AI?",
["Not at all", "Basic overview", "Clearly with examples", "Could teach a workshop"],
-2.0, 1.2, ["ml_basics", "dl_vs_ml", "gen_ai"]),
Question("lit_2", Dimension.LITERACY,
"How familiar are you with concepts like tokens, context windows, fine-tuning, and RAG?",
["Never heard of them", "Heard the terms", "Understand conceptually", "Use them in practice"],
-1.0, 1.5, ["tokens", "rag", "fine_tuning"]),
Question("lit_3", Dimension.LITERACY,
"Can you explain what a transformer architecture is and how attention mechanisms work?",
["No idea", "Vague understanding", "Can explain to a peer", "Can implement from scratch"],
0.5, 1.8, ["transformers", "attention", "architecture"]),
Question("lit_4", Dimension.LITERACY,
"How well do you understand the scaling laws that govern LLM performance?",
["Never heard", "Basic awareness", "Can discuss tradeoffs", "Can apply to model selection"],
1.5, 2.0, ["scaling_laws", "compute", "model_selection"]),
])
# --- TOOLING ---
bank.extend([
Question("tool_1", Dimension.TOOLING,
"How frequently do you use AI tools (ChatGPT, Copilot, Claude, etc.) in your work?",
["Never", "Occasionally", "Weekly", "Daily, core to workflow"],
-2.0, 1.0, ["chatgpt", "copilot", "claude", "usage_frequency"]),
Question("tool_2", Dimension.TOOLING,
"Can you chain multiple AI tools or prompts to complete a complex task end-to-end?",
["No", "Tried once or twice", "Sometimes successfully", "Regularly with custom workflows"],
-0.5, 1.3, ["prompt_chaining", "tool_orchestration", "workflows"]),
Question("tool_3", Dimension.TOOLING,
"Have you set up API integrations with LLM providers (OpenAI, Anthropic, local models)?",
["Never", "Used a no-code tool", "Wrote code for it", "Built production integrations"],
0.5, 1.5, ["api_integration", "openai_api", "local_models"]),
Question("tool_4", Dimension.TOOLING,
"How comfortable are you running open-source models locally with Ollama, LM Studio, or vLLM?",
["Don't know what those are", "Installed one once", "Run models regularly", "Optimize inference for production"],
1.5, 1.8, ["ollama", "lm_studio", "vllm", "local_inference"]),
])
# --- STRATEGY ---
bank.extend([
Question("strat_1", Dimension.STRATEGY,
"When evaluating a new project, do you assess where AI could add value or reduce effort?",
["Never consider it", "Occasionally think about it", "Systematically evaluate", "Lead AI-first ideation"],
-1.5, 1.1, ["ai_opportunity", "value_assessment", "project_evaluation"]),
Question("strat_2", Dimension.STRATEGY,
"Can you articulate the ROI or business case for an AI initiative to stakeholders?",
["Wouldn't know where to start", "Could outline rough benefits", "Can build a structured case", "Have done this successfully"],
-0.5, 1.2, ["roi", "business_case", "stakeholder_communication"]),
Question("strat_3", Dimension.STRATEGY,
"Do you have a framework for prioritizing AI initiatives by feasibility vs impact?",
["No framework", "Informal mental model", "Structured scoring system", "Organization-wide prioritization process"],
0.8, 1.6, ["prioritization", "feasibility", "impact_matrix"]),
Question("strat_4", Dimension.STRATEGY,
"Can you identify competitive moats and differentiation through AI capabilities?",
["Not applicable to my role", "Basic understanding", "Can analyze for my industry", "Have built AI-driven differentiation"],
1.8, 2.0, ["competitive_moat", "differentiation", "ai_strategy"]),
])
# --- IMPLEMENTATION ---
bank.extend([
Question("impl_1", Dimension.IMPLEMENTATION,
"Have you built, deployed, or significantly configured an AI-powered solution?",
["Never", "Followed a tutorial", "Built a working prototype", "Deployed to production"],
-1.5, 1.2, ["deployment", "prototype", "production"]),
Question("impl_2", Dimension.IMPLEMENTATION,
"How comfortable are you with prompt engineering, API integration, or model evaluation?",
["Not at all", "Basic awareness", "Can do with guidance", "Highly proficient"],
-0.5, 1.4, ["prompt_engineering", "api_integration", "model_eval"]),
Question("impl_3", Dimension.IMPLEMENTATION,
"Have you built a RAG system or fine-tuned a model for a specific domain?",
["Don't know what RAG is", "Used a no-code RAG tool", "Built custom RAG pipeline", "Fine-tuned and deployed domain model"],
0.8, 1.6, ["rag", "fine_tuning", "domain_adaptation"]),
Question("impl_4", Dimension.IMPLEMENTATION,
"Can you architect a multi-agent system or design LLM orchestration workflows?",
["No idea", "Understand conceptually", "Built a simple agent", "Production multi-agent system"],
1.8, 1.9, ["agents", "orchestration", "langgraph", "crewai"]),
])
# --- GOVERNANCE ---
bank.extend([
Question("gov_1", Dimension.GOVERNANCE,
"How well do you understand AI risks like hallucination, bias, data privacy, and IP exposure?",
["Not aware", "Heard about them", "Understand key risks", "Can design mitigations"],
-1.5, 1.0, ["hallucination", "bias", "privacy", "ip_risk"]),
Question("gov_2", Dimension.GOVERNANCE,
"Does your workflow include checks for AI output accuracy, fairness, or compliance?",
["No checks", "Occasional review", "Standard process", "Systematic governance framework"],
-0.3, 1.2, ["accuracy_checks", "fairness", "compliance"]),
Question("gov_3", Dimension.GOVERNANCE,
"Are you familiar with AI regulations (EU AI Act, NIST AI RMF, ISO 42001)?",
["Never heard", "Aware they exist", "Can navigate requirements", "Implemented compliance program"],
0.8, 1.5, ["eu_ai_act", "nist_rmf", "iso_42001", "regulation"]),
Question("gov_4", Dimension.GOVERNANCE,
"Can you design an AI governance framework covering data lineage, model cards, and audit trails?",
["Not my area", "Understand components", "Can design for a team", "Enterprise-wide implementation"],
1.8, 1.8, ["governance_framework", "model_cards", "audit_trail", "data_lineage"]),
])
# --- DATA ---
bank.extend([
Question("data_1", Dimension.DATA,
"How comfortable are you working with structured and unstructured data for AI use cases?",
["Uncomfortable", "Can read simple reports", "Can clean and prep data", "Can architect data pipelines"],
-1.5, 1.1, ["structured_data", "unstructured_data", "data_prep"]),
Question("data_2", Dimension.DATA,
"Can you evaluate whether data is sufficient and appropriate for training or prompting an AI system?",
["No", "Vaguely", "With guidance", "Yes, independently"],
-0.3, 1.3, ["data_quality", "data_sufficiency", "training_data"]),
Question("data_3", Dimension.DATA,
"Have you worked with embeddings, vector databases, or data augmentation for AI?",
["No experience", "Used a vector DB via UI", "Built embedding pipelines", "Optimized retrieval systems"],
0.8, 1.5, ["embeddings", "vector_db", "data_augmentation", "retrieval"]),
Question("data_4", Dimension.DATA,
"Can you design data collection strategies and evaluate dataset bias for model training?",
["Not applicable", "Basic awareness", "Can assess existing datasets", "Design collection from scratch"],
1.6, 1.7, ["data_collection", "dataset_bias", "training_strategy"]),
])
return bank
# ============================================================================
# IRT ENGINE — 2PL Model with Fisher Information
# ============================================================================
class IRTEngine:
"""
Two-Parameter Logistic (2PL) IRT model.
P(correct|theta) = sigmoid(a * (theta - b))
"""
@staticmethod
def sigmoid(z: float) -> float:
return 1.0 / (1.0 + math.exp(-z))
@staticmethod
def probability(theta: float, a: float, b: float) -> float:
"""Probability of a correct (high-score) response."""
return IRTEngine.sigmoid(a * (theta - b))
@staticmethod
def fisher_information(theta: float, a: float, b: float) -> float:
"""Fisher information — measure of how precisely a question measures ability at theta."""
p = IRTEngine.probability(theta, a, b)
return (a ** 2) * p * (1 - p)
@staticmethod
def likelihood(theta: float, responses: List[Tuple[float, float, int]], max_option: int = 3) -> float:
"""
Compute likelihood of theta given responses.
responses: list of (a, b, option_index) tuples.
option_index 0 = lowest, max_option = highest.
We model this as a graded response model approximation.
"""
log_lik = 0.0
for a, b, opt_idx in responses:
# Map option to a "correctness weight" 0.0 to 1.0
weight = opt_idx / max_option
# Expected probability of this weighted response
p = IRTEngine.probability(theta, a, b)
# Weighted likelihood: blend of correct and incorrect
# Higher option → closer to p=1, lower option → closer to p=0
expected = weight * p + (1 - weight) * (1 - p)
expected = max(expected, 1e-10) # avoid log(0)
log_lik += math.log(expected)
return log_lik
@staticmethod
def estimate_theta(responses: List[Tuple[float, float, int]], prior_mean: float = 0.0, prior_std: float = 1.0) -> Tuple[float, float]:
"""
MAP estimate of theta given responses.
Returns (theta_estimate, standard_error).
"""
if not responses:
return prior_mean, prior_std
# Prior contribution to log-posterior
def neg_log_posterior(theta):
log_prior = -0.5 * ((theta - prior_mean) / prior_std) ** 2
log_lik = IRTEngine.likelihood(theta, responses)
return -(log_prior + log_lik)
result = minimize_scalar(neg_log_posterior, bounds=(-4.0, 4.0), method='bounded')
theta_hat = result.x
# Approximate standard error from Fisher information at MAP
fisher = sum(IRTEngine.fisher_information(theta_hat, a, b) for a, b, _ in responses)
se = 1.0 / math.sqrt(fisher + 1.0 / (prior_std ** 2))
return theta_hat, se
# ============================================================================
# ADAPTIVE SELECTOR — Fisher Information Maximization
# ============================================================================
class AdaptiveSelector:
"""
Selects next question maximizing Fisher information at current ability estimate.
Implements content balancing (ensures all dimensions are covered).
"""
def __init__(self, min_per_dimension: int = 1, max_total: int = 12, target_precision: float = 0.3):
self.min_per_dimension = min_per_dimension
self.max_total = max_total
self.target_precision = target_precision
def select_next(
self,
state: StudentState,
question_bank: List[Question],
balance_penalty: float = 2.0
) -> Optional[Question]:
"""
Select next question using Fisher information with content balancing.
"""
unasked = state.get_unasked(question_bank)
if not unasked:
return None
# Count questions per dimension already asked
dim_counts = {d: 0 for d in Dimension}
for qid in state.asked_questions:
q = next((qq for qq in question_bank if qq.id == qid), None)
if q:
dim_counts[q.dimension] += 1
# Information scores
scores = []
for q in unasked:
theta = state.theta.get(q.dimension, 0.0)
info = IRTEngine.fisher_information(theta, q.discrimination, q.difficulty)
# Content balancing: boost under-represented dimensions
count = dim_counts[q.dimension]
if count < self.min_per_dimension:
info *= balance_penalty * (self.min_per_dimension - count + 1)
# Precision stopping: if SE is already good, slightly deprioritize
se = state.theta_variance.get(q.dimension, 1.0)
if se < self.target_precision:
info *= 0.7
scores.append((info, q))
scores.sort(key=lambda x: x[0], reverse=True)
# Return top-scoring question
return scores[0][1] if scores else None
def should_stop(self, state: StudentState) -> bool:
"""Stop when max questions reached or all dimensions have sufficient precision."""
if len(state.asked_questions) >= self.max_total:
return True
# Stop early if all dimensions have good precision and minimum coverage
dim_coverage = {d: 0 for d in Dimension}
dim_precision = {d: float('inf') for d in Dimension}
for qid in state.asked_questions:
q = next((qq for qq in build_question_bank() if qq.id == qid), None)
if q:
dim_coverage[q.dimension] += 1
dim_precision[q.dimension] = min(
dim_precision[q.dimension],
state.theta_variance.get(q.dimension, 1.0)
)
all_covered = all(c >= self.min_per_dimension for c in dim_coverage.values())
all_precise = all(se < self.target_precision for se in dim_precision.values() if se != float('inf'))
return all_covered and all_precise and len(state.asked_questions) >= 6
# ============================================================================
# KNOWLEDGE TRACING — Bayesian Update
# ============================================================================
class KnowledgeTracer:
"""
Bayesian knowledge tracing per dimension.
Updates latent ability (theta) after each response.
"""
def __init__(self, prior_mean: float = 0.0, prior_std: float = 1.0):
self.prior_mean = prior_mean
self.prior_std = prior_std
self.irt = IRTEngine()
def update(
self,
state: StudentState,
question: Question,
option_index: int,
max_option: int = 3
) -> StudentState:
"""Update student state with new response using Bayesian IRT."""
dim = question.dimension
# Add to history
state.asked_questions.append(question.id)
state.responses[question.id] = option_index
state.response_history.append({
"question_id": question.id,
"dimension": dim.value,
"option_index": option_index,
"timestamp": datetime.utcnow().isoformat(),
})
# Gather all responses for this dimension
dim_responses = []
for qid, opt in state.responses.items():
q = next((qq for qq in build_question_bank() if qq.id == qid), None)
if q and q.dimension == dim:
dim_responses.append((q.discrimination, q.difficulty, opt))
# Re-estimate theta for this dimension
theta, se = self.irt.estimate_theta(dim_responses, self.prior_mean, self.prior_std)
state.theta[dim] = theta
state.theta_variance[dim] = se
return state
def get_dimension_scores(self, state: StudentState) -> Dict[str, int]:
"""Convert latent theta to 0-100 scores (archai-compatible)."""
scores = {}
for dim in Dimension:
theta = state.theta.get(dim, 0.0)
# Convert theta (-4 to 4) to 0-100 with sigmoid
# theta=0 → 50%, theta=2 → ~88%, theta=-2 → ~12%
score = int(round(100 * self.irt.sigmoid(theta * 0.8 + 0.1) * 1.1))
score = max(5, min(95, score))
scores[dim.value] = score
return scores
def get_overall_score(self, state: StudentState) -> int:
scores = self.get_dimension_scores(state)
return round(sum(scores.values()) / len(scores))
# ============================================================================
# LEARNING PATH GENERATOR — Structured day/week/month actionables
# ============================================================================
class LearningPathGenerator:
"""
Generates granular learning paths with day/week/month actionables.
Uses rule-based logic aligned with archai's action plan structure.
"""
def __init__(self):
self.stages = [
{"id": "awareness", "label": "Awareness", "threshold": 20, "desc": "You recognize AI's potential"},
{"id": "understanding", "label": "Understanding", "threshold": 40, "desc": "You grasp core concepts"},
{"id": "application", "label": "Application", "threshold": 60, "desc": "You use AI daily"},
{"id": "integration", "label": "Integration", "threshold": 75, "desc": "AI is embedded in your work"},
{"id": "mastery", "label": "Mastery", "threshold": 90, "desc": "You architect AI systems"},
]
self.archetypes = [
{"id": "pioneer", "label": "The Pioneer", "desc": "High across the board — charting new territory",
"condition": lambda s: all(v >= 70 for v in s.values())},
{"id": "responsible-builder", "label": "The Responsible Builder", "desc": "Balances capability with caution",
"condition": lambda s: s.get("governance", 0) >= 60 and s.get("implementation", 0) >= 50},
{"id": "data-craftsman", "label": "The Data Craftsman", "desc": "Data-first, builds from evidence",
"condition": lambda s: s.get("data", 0) >= 60 and s.get("implementation", 0) >= 50},
{"id": "power-user", "label": "The Power User", "desc": "Fluent with tools, ready to strategize next",
"condition": lambda s: s.get("tooling", 0) >= 60 and s.get("strategy", 0) < 50},
{"id": "vision-caster", "label": "The Vision Caster", "desc": "Strategic thinker — hands-on comes next",
"condition": lambda s: s.get("strategy", 0) >= 60 and s.get("implementation", 0) < 50},
{"id": "integrator", "label": "The Integrator", "desc": "Well-rounded across every dimension",
"condition": lambda s: (avg := sum(s.values())/len(s.values()), sd := (sum((v-avg)**2 for v in s.values())/len(s.values()))**0.5, sd < 18 and avg >= 50)[2]},
{"id": "explorer", "label": "The Explorer", "desc": "Curious and ready to dive in",
"condition": lambda s: all(v < 45 for v in s.values())},
{"id": "apprentice", "label": "The Apprentice", "desc": "Building foundational fluency",
"condition": lambda s: True}, # fallback
]
def determine_stage(self, overall_score: int) -> Dict:
stage = self.stages[0]
for s in self.stages:
if overall_score >= s["threshold"]:
stage = s
return stage
def determine_archetype(self, scores: Dict[str, int]) -> Dict:
for arch in self.archetypes:
try:
if arch["condition"](scores):
return {"id": arch["id"], "label": arch["label"], "desc": arch["desc"]}
except:
continue
return {"id": "apprentice", "label": "The Apprentice", "desc": "Building foundational fluency"}
def generate_learning_path(
self,
scores: Dict[str, int],
persona_id: str,
hours_per_week: int,
budget_usd: int,
hardware_id: Optional[str] = None,
preference: Optional[str] = None
) -> Dict[str, Any]:
"""
Generate a comprehensive learning path with day/week/month granularity.
"""
overall = round(sum(scores.values()) / len(scores))
stage = self.determine_stage(overall)
archetype = self.determine_archetype(scores)
# Identify weakest dimensions (gaps to close)
sorted_dims = sorted(scores.items(), key=lambda x: x[1])
# Build time-bucketed actionables
days = self._generate_days(sorted_dims, persona_id, hours_per_week, budget_usd)
weeks = self._generate_weeks(sorted_dims, persona_id, hours_per_week, budget_usd, stage)
months = self._generate_months(sorted_dims, persona_id, hours_per_week, budget_usd, stage, hardware_id)
return {
"overall_score": overall,
"stage": stage,
"archetype": archetype,
"dimension_scores": scores,
"gaps": [{"dimension": d, "score": s, "priority": i+1} for i, (d, s) in enumerate(sorted_dims[:3])],
"strengths": [{"dimension": d, "score": s} for d, s in sorted_dims[-2:]],
"learning_path": {
"days": days,
"weeks": weeks,
"months": months,
},
"projections": self._compute_projections(overall, stage, hours_per_week),
"meta": {
"total_hours": sum(a.get("estimated_hours", 0) for w in weeks for a in w["actions"]),
"estimated_weeks": max(1, round(sum(a.get("estimated_hours", 0) for w in weeks for a in w["actions"]) / hours_per_week)) if hours_per_week else None,
"generated_at": datetime.utcnow().isoformat(),
}
}
def _generate_days(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int) -> List[Dict]:
"""Day 1-7 granular actionables — immediate, bite-sized wins."""
weakest = sorted_dims[0][0]
# Day 1: Always start with the weakest dimension
day1_actions = {
"literacy": {
"title": "Read the Anthropic Prompt Engineering Guide",
"desc": "The highest-ROI single hour. Changes how you talk to every model.",
"link": "https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/overview",
"time": "45 min",
"type": "reading",
},
"tooling": {
"title": "Try Google AI Studio with a real work task",
"desc": "Open AI Studio. Paste any work email or doc. Ask: 'What am I missing?'",
"link": "https://aistudio.google.com",
"time": "15 min",
"type": "hands_on",
},
"strategy": {
"title": "List 3 weekly tasks you hate",
"desc": "Open Notes. Write the 3 most repetitive things you did this week.",
"link": None,
"time": "10 min",
"type": "worksheet",
},
"implementation": {
"title": "Install Claude Code or Cursor",
"desc": "One terminal command. You'll have an AI pair programmer before lunch.",
"link": "https://docs.anthropic.com/en/docs/claude-code",
"time": "10 min",
"type": "setup",
},
"governance": {
"title": "Skim the NIST AI RMF index",
"desc": "Five minutes tells you what you don't know. The framework is free.",
"link": "https://www.nist.gov/itl/ai-risk-management-framework",
"time": "15 min",
"type": "reading",
},
"data": {
"title": "Ask Gemini about your spreadsheet",
"desc": "Open any Google Sheet. Use the side panel: 'Summarize this for me.'",
"link": "https://workspace.google.com/products/gemini/",
"time": "5 min",
"type": "hands_on",
},
}
days = []
# Day 1: Close biggest gap
action = day1_actions.get(weakest, day1_actions["literacy"])
days.append({
"day": 1,
"focus": f"Close your {weakest} gap",
"title": action["title"],
"description": action["desc"],
"action_type": action["type"],
"estimated_time": action["time"],
"resource_link": action["link"],
"why": f"Your {weakest} score is lowest. A small win here unlocks everything else.",
"quick_win": True,
})
# Day 2-7: Rotating through dimensions
day_templates = [
("tooling", "Daily AI tool practice", "Use an AI tool for one real work task today.", "15 min"),
("literacy", "Watch one AI explainer", "Pick a 10-min video on YouTube about LLMs, RAG, or agents.", "15 min"),
("implementation", "Build something tiny", "Create a prompt template or simple automation.", "30 min"),
("strategy", "Map one AI opportunity", "Pick a work process. Ask: how could AI help?", "20 min"),
("governance", "Review one AI risk", "Read about one AI failure case. What went wrong?", "15 min"),
("data", "Explore your data", "Open a dataset you use. What patterns could AI find?", "20 min"),
]
for i, (dim, title, desc, time) in enumerate(day_templates, start=2):
days.append({
"day": i,
"focus": dim,
"title": title,
"description": desc,
"action_type": "practice",
"estimated_time": time,
"resource_link": None,
"why": f"Building muscle memory in {dim} through consistent micro-practice.",
"quick_win": False,
})
return days
def _generate_weeks(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int, stage: Dict) -> List[Dict]:
"""Week-by-week structured plan with measurable milestones."""
# Determine how many weeks based on hours and stage
gap_to_next = max(0, self._next_stage_threshold(stage) - round(sum(s for _, s in sorted_dims)/len(sorted_dims)))
estimated_weeks = max(2, min(8, math.ceil(gap_to_next / max(5, hours * 0.3)))) if hours else 4
weeks = []
for week_num in range(1, estimated_weeks + 1):
# Rotate focus dimensions
focus_dims = [d for d, _ in sorted_dims[:2]]
focus = focus_dims[(week_num - 1) % len(focus_dims)] if focus_dims else "literacy"
actions = self._week_actions(week_num, focus, persona_id, hours, budget, stage)
weeks.append({
"week": week_num,
"focus_dimension": focus,
"theme": self._week_theme(week_num, stage),
"milestone": self._week_milestone(week_num, focus, stage),
"actions": actions,
"estimated_hours": sum(a.get("estimated_hours", 0) for a in actions),
"checkpoint": f"Score {min(95, 20 + week_num * 10)}% in {focus} dimension",
})
return weeks
def _week_actions(self, week: int, focus: str, persona_id: str, hours: int, budget: int, stage: Dict) -> List[Dict]:
"""Generate specific actions for a week."""
actions = []
# Core learning block (always present)
if focus == "literacy":
actions.append({
"title": f"Week {week}: Deep-dive into AI fundamentals",
"description": "Study transformer architecture, attention mechanisms, and model families.",
"type": "course",
"resource": "HuggingFace NLP Course",
"link": "https://huggingface.co/learn/nlp-course",
"estimated_hours": 2,
"deliverable": "Complete 2 chapters + quiz",
"cost": "$0",
})
elif focus == "tooling":
actions.append({
"title": f"Week {week}: Master one new AI tool",
"description": "Deep exploration of one tool: Claude, Cursor, or a local model runner.",
"type": "lab",
"resource": "Tool documentation + 3 real tasks",
"link": None,
"estimated_hours": 2,
"deliverable": "Complete 3 real work tasks using the tool",
"cost": "$0" if budget == 0 else "$0-20",
})
elif focus == "strategy":
actions.append({
"title": f"Week {week}: Evaluate 2 AI opportunities",
"description": "Map processes at work. Score by feasibility × impact. Present to one colleague.",
"type": "workshop",
"resource": "AI Use Case Canvas",
"link": "https://aiusecase.io",
"estimated_hours": 2,
"deliverable": "One-page opportunity brief",
"cost": "$0",
})
elif focus == "implementation":
actions.append({
"title": f"Week {week}: Build a working prototype",
"description": "Create a RAG pipeline, agent, or API integration. Ship to a friend for feedback.",
"type": "lab",
"resource": "Dify or Flowise for no-code; LangChain for code",
"link": "https://dify.ai",
"estimated_hours": 3,
"deliverable": "Working prototype + demo video",
"cost": "$0",
})
elif focus == "governance":
actions.append({
"title": f"Week {week}: Draft your AI policy",
"description": "Cover approved tools, data classification, review requirements.",
"type": "workshop",
"resource": "NIST AI RMF Template",
"link": "https://www.nist.gov/artificial-intelligence/ai-risk-management-framework",
"estimated_hours": 2,
"deliverable": "1-page team AI policy draft",
"cost": "$0",
})
elif focus == "data":
actions.append({
"title": f"Week {week}: Data pipeline practice",
"description": "Clean a dataset, build embeddings, or set up a vector DB.",
"type": "lab",
"resource": "ChromaDB or Weaviate tutorials",
"link": "https://docs.trychroma.com",
"estimated_hours": 2,
"deliverable": "Working vector search over your documents",
"cost": "$0",
})
# Reflection action (every week)
actions.append({
"title": f"Week {week} reflection",
"description": "Review what worked. Note one thing that surprised you. Adjust next week's plan.",
"type": "reflection",
"resource": "Personal learning journal",
"link": None,
"estimated_hours": 0.5,
"deliverable": "3 bullet journal entries",
"cost": "$0",
})
return actions
def _week_theme(self, week: int, stage: Dict) -> str:
themes = [
"Foundation & Discovery",
"Building Core Skills",
"Expanding Your Toolkit",
"Applying to Real Work",
"Deepening Specialization",
"Integration & Scale",
"Governance & Safety",
"Mastery & Teaching",
]
return themes[(week - 1) % len(themes)]
def _week_milestone(self, week: int, focus: str, stage: Dict) -> str:
return f"Complete {week} week(s) of focused practice in {focus}"
def _next_stage_threshold(self, current_stage: Dict) -> int:
thresholds = [20, 40, 60, 75, 90, 100]
current = current_stage["threshold"]
for t in thresholds:
if t > current:
return t
return 100
def _generate_months(self, sorted_dims: List[Tuple[str, int]], persona_id: str, hours: int, budget: int, stage: Dict, hardware_id: Optional[str]) -> List[Dict]:
"""Month-level strategic goals with outcomes."""
months = []
for month_num in range(1, 4): # 3-month horizon
goals = []
if month_num == 1:
goals = [
{"title": "Close weakest gap to 50%", "metric": f"{sorted_dims[0][0]} >= 50%", "tactics": ["Daily micro-practice", "One course completion", "Peer discussion"]}
]
if persona_id in ["ml-eng", "swe", "data-sci"]:
goals.append({"title": "Ship one AI-assisted code project", "metric": "1 repo with AI integration", "tactics": ["Cursor/Claude Code", "API integration", "Document your approach"]})
elif month_num == 2:
goals = [
{"title": "Build cross-dimensional fluency", "metric": "All dimensions >= 45%", "tactics": ["Rotate weekly focus", "Interdisciplinary projects", "Teach a colleague"]}
]
if hardware_id:
goals.append({"title": "Run local models for 50% of AI tasks", "metric": "Local inference usage >= 50%", "tactics": ["Ollama setup", "Model comparison", "Latency optimization"]})
else: # month 3
goals = [
{"title": "Lead an AI initiative", "metric": "One shipped AI project or team workshop", "tactics": ["Identify opportunity", "Build consensus", "Execute with metrics"]}
]
if sum(s for _, s in sorted_dims) / len(sorted_dims) >= 60:
goals.append({"title": "Mentor 2 colleagues into AI fluency", "metric": "2 people show measurable improvement", "tactics": ["Weekly office hours", "Curated resources", "Accountability check-ins"]})
months.append({
"month": month_num,
"theme": ["Build Foundation", "Expand & Integrate", "Lead & Scale"][month_num - 1],
"strategic_goals": goals,
"checkpoint": f"Overall score target: {min(95, stage['threshold'] + month_num * 10)}%",
"review_questions": [
"What was the biggest surprise this month?",
"Which action had the highest ROI?",
"What gap still feels hardest to close?",
"Who can you teach what you learned?",
],
})
return months
def _compute_projections(self, overall: int, stage: Dict, hours_per_week: int) -> Dict:
"""Project timeline to next stage."""
next_threshold = self._next_stage_threshold(stage)
gap = max(0, next_threshold - overall)
if hours_per_week and gap > 0:
# Rough estimate: 1 point improvement per 2 focused hours
hours_needed = gap * 2
weeks_needed = max(1, math.ceil(hours_needed / hours_per_week))
target_date = datetime.utcnow() + timedelta(weeks=weeks_needed)
return {
"current_stage": stage["label"],
"next_stage": self.stages[min(self.stages.index(stage) + 1, len(self.stages) - 1)]["label"],
"gap_to_next": gap,
"estimated_weeks": weeks_needed,
"at_hours_per_week": hours_per_week,
"projected_reach_date": target_date.strftime("%b %d, %Y"),
}
return {
"current_stage": stage["label"],
"next_stage": self.stages[min(self.stages.index(stage) + 1, len(self.stages) - 1)]["label"],
"gap_to_next": gap,
"estimated_weeks": None,
"at_hours_per_week": hours_per_week,
"projected_reach_date": None,
}
# ============================================================================
# MAIN ENGINE — Orchestrator
# ============================================================================
class AdaptiveAssessmentEngine:
"""
Main orchestrator:
- Manages sessions
- Adaptive question selection via IRT
- Bayesian knowledge tracing
- Generates learning paths
"""
def __init__(self):
self.question_bank = build_question_bank()
self.irt = IRTEngine()
self.selector = AdaptiveSelector(min_per_dimension=1, max_total=12)
self.tracer = KnowledgeTracer()
self.path_gen = LearningPathGenerator()
self.sessions: Dict[str, StudentState] = {}
def start_session(self) -> Dict:
"""Initialize a new assessment session."""
session_id = str(uuid.uuid4())[:12]
state = StudentState(
session_id=session_id,
theta={d: 0.0 for d in Dimension},
theta_variance={d: 1.0 for d in Dimension},
)
self.sessions[session_id] = state
# Select first question (highest info at theta=0)
first_q = self.selector.select_next(state, self.question_bank)
return {
"session_id": session_id,
"question": self._question_to_dict(first_q) if first_q else None,
"progress": {"asked": 0, "total": 12, "dimensions_covered": []},
"status": "in_progress",
}
def submit_answer(self, session_id: str, question_id: str, option_index: int) -> Dict:
"""Submit an answer and get the next question or results."""
state = self.sessions.get(session_id)
if not state:
return {"error": "Session not found", "status": "error"}
question = next((q for q in self.question_bank if q.id == question_id), None)
if not question:
return {"error": "Question not found", "status": "error"}
# Update knowledge state
state = self.tracer.update(state, question, option_index)
# Check if we should stop
if self.selector.should_stop(state):
return self._finalize(state)
# Select next question
next_q = self.selector.select_next(state, self.question_bank)
# Calculate progress
dim_coverage = set()
for qid in state.asked_questions:
q = next((qq for qq in self.question_bank if qq.id == qid), None)
if q:
dim_coverage.add(q.dimension.value)
return {
"session_id": session_id,
"question": self._question_to_dict(next_q) if next_q else None,
"progress": {
"asked": len(state.asked_questions),
"total": 12,
"dimensions_covered": list(dim_coverage),
"current_dimension": next_q.dimension.value if next_q else None,
},
"interim_scores": self.tracer.get_dimension_scores(state),
"status": "in_progress" if next_q else "complete",
}
def get_results(self, session_id: str) -> Dict:
"""Get final assessment results."""
state = self.sessions.get(session_id)
if not state:
return {"error": "Session not found", "status": "error"}
return self._finalize(state)
def generate_path(self, session_id: str, persona_id: str, hours_per_week: int, budget_usd: int, hardware_id: Optional[str] = None, preference: Optional[str] = None) -> Dict:
"""Generate learning path from assessment results."""
state = self.sessions.get(session_id)
if not state:
return {"error": "Session not found", "status": "error"}
scores = self.tracer.get_dimension_scores(state)
path = self.path_gen.generate_learning_path(
scores, persona_id, hours_per_week, budget_usd, hardware_id, preference
)
path["session_id"] = session_id
return path
def _finalize(self, state: StudentState) -> Dict:
"""Generate final assessment report."""
scores = self.tracer.get_dimension_scores(state)
overall = self.tracer.get_overall_score(state)
stage = self.path_gen.determine_stage(overall)
archetype = self.path_gen.determine_archetype(scores)
# Strengths and gaps
sorted_scores = sorted(scores.items(), key=lambda x: x[1])
# Percentile estimation (simplified — can be calibrated with population data)
# Based on normal distribution assumption
import scipy.stats as stats
percentile = int(round(100 * stats.norm.cdf((overall - 50) / 20)))
percentile = max(1, min(99, percentile))
return {
"session_id": state.session_id,
"status": "complete",
"overall_score": overall,
"dimension_scores": scores,
"stage": stage,
"archetype": archetype,
"strengths": [
{"dimension": d, "label": DIMENSION_LABELS.get(Dimension(d), d), "score": s, "color": DIMENSION_COLORS.get(d, "#14B8A6")}
for d, s in sorted_scores[-2:]
],
"gaps": [
{"dimension": d, "label": DIMENSION_LABELS.get(Dimension(d), d), "score": s, "color": DIMENSION_COLORS.get(d, "#F43F5E")}
for d, s in sorted_scores[:2]
],
"percentile": percentile,
"questions_answered": len(state.asked_questions),
"response_history": state.response_history,
"latent_abilities": {d.value: round(t, 2) for d, t in state.theta.items()},
"measurement_precision": {d.value: round(v, 3) for d, v in state.theta_variance.items()},
}
def _question_to_dict(self, q: Optional[Question]) -> Optional[Dict]:
if not q:
return None
return {
"id": q.id,
"dimension": q.dimension.value,
"dimension_label": DIMENSION_LABELS.get(q.dimension, q.dimension.value),
"text": q.text,
"options": q.options,
"difficulty": round(q.difficulty, 2),
"discrimination": round(q.discrimination, 2),
"concept_tags": q.concept_tags,
}
# Singleton instance
engine = AdaptiveAssessmentEngine()
|