Add adaptive engine (Elo + BKT + Thompson Sampling orchestrator)

e34f815 verified 4 days ago

22.2 kB

	"""
	MathLingua — Adaptive Engine

	Hybrid adaptive algorithm combining:
	1. Elo Rating — overall ability tracking with hint-weighted outcomes
	2. Bayesian Knowledge Tracing (BKT) — per-topic mastery estimation
	3. Thompson Sampling — intelligent question-level selection with ZPD windowing

	The orchestrator combines all three to produce progression decisions:
	SKIP (+2), INCREASE (+1), MAINTAIN (0), DECREASE (-1), RAPID_DECREASE (-2)

	Reference: MathLingua Technical Specification §6
	"""

	from __future__ import annotations

	import math
	import random
	from dataclasses import dataclass, field
	from typing import Optional

	from feature_engineering import (
	FeatureEngineer,
	EngineeredFeatures,
	InteractionSignals,
	)


	# ────────────────────────────────────────────────────────
	# Constants
	# ────────────────────────────────────────────────────────

	LEVELS = [
	"1.1", "1.2", "1.3", "1.4", "1.5",
	"2.1", "2.2", "2.3", "2.4", "2.5",
	"3.1", "3.2", "3.3", "3.4", "3.5",
	]

	LEVEL_TO_ELO: dict[str, int] = {
	"1.1": 820, "1.2": 870, "1.3": 920, "1.4": 970, "1.5": 1020,
	"2.1": 1070, "2.2": 1120, "2.3": 1170, "2.4": 1220, "2.5": 1270,
	"3.1": 1320, "3.2": 1370, "3.3": 1420, "3.4": 1470, "3.5": 1520,
	}

	ELO_TO_LEVEL = sorted(LEVEL_TO_ELO.items(), key=lambda x: x[1])

	TOPICS = ["arithmetic", "fractions", "percentages", "algebra", "geometry", "statistics"]

	INITIAL_STUDENT_ELO = 1000


	# ────────────────────────────────────────────────────────
	# Elo Engine
	# ────────────────────────────────────────────────────────

	class EloEngine:
	"""
	Elo rating system adapted for education with hint-weighted outcomes.

	Weighted outcomes: 1.00 (no hint), 0.75 (L1), 0.50 (L2), 0.25 (L3), 0.00 (L4/incorrect)
	K-factor schedule: 48 (first 10), 32 (11–30), 24 (30+)
	"""

	def __init__(self):
	pass

	@staticmethod
	def expected_score(student_elo: float, question_elo: float) -> float:
	"""E_s = 1 / (1 + 10^((R_q - R_s) / 400))"""
	return 1.0 / (1.0 + math.pow(10.0, (question_elo - student_elo) / 400.0))

	@staticmethod
	def k_factor_student(interaction_count: int) -> float:
	if interaction_count <= 10:
	return 48.0
	elif interaction_count <= 30:
	return 32.0
	else:
	return 24.0

	@staticmethod
	def k_factor_question(interaction_count: int) -> float:
	if interaction_count <= 10:
	return 8.0
	elif interaction_count <= 30:
	return 6.0
	else:
	return 4.0

	def update(
	self,
	student_elo: float,
	question_elo: float,
	weighted_outcome: float,
	student_interactions: int,
	) -> tuple[float, float]:
	"""
	Update student and question Elo ratings.

	Returns: (new_student_elo, new_question_elo)
	"""
	expected = self.expected_score(student_elo, question_elo)
	ks = self.k_factor_student(student_interactions)
	kq = self.k_factor_question(student_interactions)

	new_student = student_elo + ks * (weighted_outcome - expected)
	new_question = question_elo + kq * (expected - weighted_outcome)

	return round(new_student, 1), round(new_question, 1)


	# ────────────────────────────────────────────────────────
	# Bayesian Knowledge Tracing (BKT)
	# ────────────────────────────────────────────────────────

	@dataclass
	class BKTParams:
	"""BKT parameters for one topic."""
	p_know: float = 0.10 # P(L_0) — prior knowledge
	p_learn: float = 0.15 # P(T) — learn rate
	p_slip: float = 0.10 # P(S) — slip
	p_guess: float = 0.25 # P(G) — guess


	class BKTEngine:
	"""
	Bayesian Knowledge Tracing with slip adjustment for scaffold usage.

	P(S)_adj = P(S) × (1 + 0.5 × hint_depth_normalized)

	This makes BKT more skeptical of scaffold-assisted correctness.
	"""

	def __init__(self, topics: Optional[list[str]] = None):
	self.topics = topics or TOPICS
	self.params: dict[str, BKTParams] = {
	t: BKTParams() for t in self.topics
	}

	def get_mastery(self, topic: str) -> float:
	"""Return P(know) for a topic."""
	return self.params.get(topic, BKTParams()).p_know

	def update(
	self,
	topic: str,
	weighted_outcome: float,
	hint_depth_normalized: float,
	) -> float:
	"""
	Update P(know) for a topic given an interaction outcome.

	Args:
	topic: Math topic string
	weighted_outcome: 0.0–1.0 hint-weighted outcome
	hint_depth_normalized: h_i / 4 (0.0–1.0)

	Returns: New P(know)
	"""
	if topic not in self.params:
	self.params[topic] = BKTParams()

	p = self.params[topic]

	# Adjust slip probability based on hint depth
	p_slip_adj = p.p_slip * (1.0 + 0.5 * hint_depth_normalized)
	p_slip_adj = min(p_slip_adj, 0.5) # cap at 0.5

	# Determine if "correct" or "incorrect" for BKT purposes
	is_correct = weighted_outcome >= 0.5

	if is_correct:
	# P(L_n \| correct) = P(L) * (1-P(S)_adj) / [P(L)(1-P(S)_adj) + (1-P(L))P(G)]
	numerator = p.p_know * (1.0 - p_slip_adj)
	denominator = numerator + (1.0 - p.p_know) * p.p_guess
	else:
	# P(L_n \| incorrect) = P(L) * P(S)_adj / [P(L)P(S)_adj + (1-P(L))(1-P(G))]
	numerator = p.p_know * p_slip_adj
	denominator = numerator + (1.0 - p.p_know) * (1.0 - p.p_guess)

	if denominator > 0:
	p_know_given_obs = numerator / denominator
	else:
	p_know_given_obs = p.p_know

	# Learning transition: P(L_n) = P(L_n\|O) + (1 - P(L_n\|O)) * P(T)
	new_p_know = p_know_given_obs + (1.0 - p_know_given_obs) * p.p_learn
	new_p_know = max(0.01, min(0.99, new_p_know)) # clamp

	p.p_know = round(new_p_know, 4)
	return p.p_know


	# ────────────────────────────────────────────────────────
	# Thompson Sampling
	# ────────────────────────────────────────────────────────

	@dataclass
	class BetaPrior:
	alpha: float = 1.0
	beta: float = 1.0


	class ThompsonSampler:
	"""
	Beta-Bernoulli Thompson Sampling with ZPD window and proximity bonus.

	ZPD window: [current_level - 2, current_level + 3] (asymmetric upward)
	Proximity bonus: Gaussian centered on student Elo, σ = 100
	"""

	def __init__(self):
	self.priors: dict[str, BetaPrior] = {
	level: BetaPrior() for level in LEVELS
	}

	def update(self, level: str, weighted_outcome: float) -> None:
	"""Update Beta prior for a level based on weighted outcome."""
	if level not in self.priors:
	self.priors[level] = BetaPrior()
	self.priors[level].alpha += weighted_outcome
	self.priors[level].beta += (1.0 - weighted_outcome)

	def select(self, current_level: str, student_elo: float) -> str:
	"""
	Select next question level via Thompson Sampling within ZPD window.
	"""
	current_idx = LEVELS.index(current_level) if current_level in LEVELS else 5
	# ZPD window: -2 to +3
	lo = max(0, current_idx - 2)
	hi = min(len(LEVELS), current_idx + 4) # +4 because slice is exclusive
	candidate_levels = LEVELS[lo:hi]

	best_score = -1.0
	best_level = current_level

	for level in candidate_levels:
	prior = self.priors.get(level, BetaPrior())

	# Sample from Beta distribution
	sampled_theta = random.betavariate(
	max(prior.alpha, 0.01),
	max(prior.beta, 0.01),
	)

	# Gaussian proximity bonus
	level_elo = LEVEL_TO_ELO.get(level, 1000)
	proximity = math.exp(
	-0.5 * ((level_elo - student_elo) / 100.0) ** 2
	)

	score = sampled_theta * proximity

	if score > best_score:
	best_score = score
	best_level = level

	return best_level


	# ────────────────────────────────────────────────────────
	# Feature Predictor (for P(isSolved))
	# ────────────────────────────────────────────────────────

	class FeaturePredictor:
	"""
	Simple logistic model predicting P(isSolved) from features.
	Weights from spec §5.6 (logistic regression on simulated data).
	"""

	# Feature importance weights (from spec)
	W_MCS: float = 0.42
	W_ELO_GAP: float = 0.28
	W_LDS: float = -0.18
	W_BKT: float = 0.15
	W_STREAK: float = 0.08
	BIAS: float = -0.30

	@staticmethod
	def _sigmoid(x: float) -> float:
	return 1.0 / (1.0 + math.exp(-x))

	def predict(
	self,
	mcs_avg: float,
	elo_gap: float, # student_elo - question_elo (normalized by /400)
	lds_avg: float,
	p_know: float,
	streak: int,
	) -> float:
	"""
	Predict probability that the student solves the next problem without L4.
	"""
	z = (
	self.BIAS
	+ self.W_MCS * mcs_avg
	+ self.W_ELO_GAP * elo_gap
	+ self.W_LDS * lds_avg
	+ self.W_BKT * p_know
	+ self.W_STREAK * min(streak, 5) / 5.0
	)
	return round(self._sigmoid(z), 4)


	# ────────────────────────────────────────────────────────
	# Adaptive Engine (Orchestrator)
	# ────────────────────────────────────────────────────────

	@dataclass
	class AdaptiveState:
	"""Complete adaptive state for one student."""
	student_elo: float = INITIAL_STUDENT_ELO
	current_level: str = "2.1" # start at center
	total_interactions: int = 0
	streak_correct: int = 0 # consecutive weighted_outcome >= 0.75
	streak_wrong: int = 0 # consecutive weighted_outcome < 0.40
	recent_lds: list[float] = field(default_factory=list) # last 5
	recent_mcs: list[float] = field(default_factory=list) # last 5
	enhanced_scaffold: bool = False


	class AdaptiveEngine:
	"""
	Main orchestrator combining Elo, BKT, Thompson Sampling, and feature engineering.

	Decision logic (from spec §6.5):
	weighted_outcome ≥ 0.85 AND streak ≥ 3 → SKIP (+2)
	weighted_outcome ≥ 0.75 AND P(know) ≥ 0.7 → INCREASE (+1)
	weighted_outcome ≥ 0.40 → MAINTAIN (0)
	weighted_outcome ≥ 0.25 OR streak_wrong < 2 → DECREASE (-1)
	else (outcome < 0.25 AND P(know) < 0.30) → RAPID_DECREASE (-2)
	"""

	def __init__(self, seed: Optional[int] = None):
	self.elo_engine = EloEngine()
	self.bkt_engine = BKTEngine()
	self.thompson = ThompsonSampler()
	self.feature_eng = FeatureEngineer()
	self.predictor = FeaturePredictor()
	self.state = AdaptiveState()

	if seed is not None:
	random.seed(seed)

	def _elo_to_level(self, elo: float) -> str:
	"""Map an Elo rating to the nearest sub-level."""
	best_level = LEVELS[0]
	best_dist = abs(elo - LEVEL_TO_ELO[LEVELS[0]])
	for level, level_elo in ELO_TO_LEVEL:
	dist = abs(elo - level_elo)
	if dist < best_dist:
	best_dist = dist
	best_level = level
	return best_level

	def _shift_level(self, level: str, delta: int) -> str:
	"""Shift a level by delta sub-levels, clamped to valid range."""
	idx = LEVELS.index(level) if level in LEVELS else 5
	new_idx = max(0, min(len(LEVELS) - 1, idx + delta))
	return LEVELS[new_idx]

	def _update_rolling(self, lst: list[float], value: float, window: int = 5):
	lst.append(value)
	if len(lst) > window:
	lst.pop(0)

	def process_interaction(
	self,
	signals: InteractionSignals,
	question_elo: float,
	topic: str,
	) -> dict:
	"""
	Process a single student-question interaction.

	Returns a dict with:
	- features: EngineeredFeatures
	- weighted_outcome: float
	- new_student_elo: float
	- new_p_know: float
	- decision: str
	- next_level: str
	- enhanced_scaffold: bool
	"""
	s = self.state

	# 1. Compute engineered features
	features = self.feature_eng.compute(signals)
	weighted_outcome = self.feature_eng.compute_weighted_outcome(
	signals.is_correct, signals.max_hint_level
	)

	# 2. Update Elo
	s.total_interactions += 1
	new_elo, new_q_elo = self.elo_engine.update(
	s.student_elo, question_elo, weighted_outcome, s.total_interactions
	)
	s.student_elo = new_elo

	# 3. Update BKT
	hint_depth = signals.max_hint_level / 4.0
	new_p_know = self.bkt_engine.update(topic, weighted_outcome, hint_depth)

	# 4. Update Thompson priors
	self.thompson.update(signals.question_level, weighted_outcome)

	# 5. Update streaks
	if weighted_outcome >= 0.75:
	s.streak_correct += 1
	s.streak_wrong = 0
	elif weighted_outcome < 0.40:
	s.streak_wrong += 1
	s.streak_correct = 0
	else:
	s.streak_correct = 0
	s.streak_wrong = 0

	# 6. Update rolling averages
	self._update_rolling(s.recent_lds, features.lds)
	self._update_rolling(s.recent_mcs, features.mcs)

	# 7. Progression decision
	if weighted_outcome >= 0.85 and s.streak_correct >= 3:
	decision = "SKIP"
	level_delta = 2
	elif weighted_outcome >= 0.75 and new_p_know >= 0.70:
	decision = "INCREASE"
	level_delta = 1
	elif weighted_outcome >= 0.40:
	decision = "MAINTAIN"
	level_delta = 0
	elif weighted_outcome >= 0.25 or s.streak_wrong < 2:
	decision = "DECREASE"
	level_delta = -1
	else:
	decision = "RAPID_DECREASE"
	level_delta = -2

	# 8. LDS/MCS diagnostic overlay
	avg_lds = sum(s.recent_lds) / max(len(s.recent_lds), 1)
	avg_mcs = sum(s.recent_mcs) / max(len(s.recent_mcs), 1)
	s.enhanced_scaffold = False

	if avg_lds > 0.6 and avg_mcs > 0.6:
	# Language gap: knows math, needs scaffold — don't decrease
	if level_delta < 0:
	decision = "MAINTAIN"
	level_delta = 0
	s.enhanced_scaffold = True

	# 9. Apply level change
	decision_level = self._shift_level(s.current_level, level_delta)

	# 10. Thompson sampling for fine-grained selection
	thompson_level = self.thompson.select(decision_level, s.student_elo)

	# 11. Override if Thompson and decision disagree strongly
	dec_idx = LEVELS.index(decision_level) if decision_level in LEVELS else 5
	th_idx = LEVELS.index(thompson_level) if thompson_level in LEVELS else 5

	if level_delta < 0 and th_idx > dec_idx + 1:
	# Decision says decrease but Thompson wants to increase significantly
	next_level = decision_level
	else:
	next_level = thompson_level

	s.current_level = next_level

	return {
	"features": features,
	"weighted_outcome": weighted_outcome,
	"new_student_elo": s.student_elo,
	"new_p_know": new_p_know,
	"decision": decision,
	"decision_level": decision_level,
	"next_level": next_level,
	"enhanced_scaffold": s.enhanced_scaffold,
	"avg_lds": round(avg_lds, 4),
	"avg_mcs": round(avg_mcs, 4),
	"quadrant": features.quadrant,
	}


	# ────────────────────────────────────────────────────────
	# Simulation
	# ────────────────────────────────────────────────────────

	def simulate_student_profile(
	profile_name: str,
	true_level_idx: int,
	base_p_correct: float,
	hint_tendency: float,
	n_interactions: int = 20,
	seed: int = 42,
	) -> dict:
	"""
	Simulate a student profile through n_interactions.

	Args:
	profile_name: Label for this profile
	true_level_idx: Index into LEVELS of the student's true ability
	base_p_correct: Base probability of getting correct answer
	hint_tendency: Probability of requesting hints (0=never, 1=always)
	n_interactions: Number of practice interactions
	seed: Random seed
	"""
	random.seed(seed)
	engine = AdaptiveEngine(seed=seed)
	true_elo = LEVEL_TO_ELO[LEVELS[true_level_idx]]

	results = []

	for i in range(n_interactions):
	current_level = engine.state.current_level
	question_elo = LEVEL_TO_ELO.get(current_level, 1000)

	# Simulate difficulty effect on correctness
	elo_diff = true_elo - question_elo
	difficulty_modifier = 1.0 / (1.0 + math.exp(-elo_diff / 200.0))
	p_correct = base_p_correct * difficulty_modifier + 0.1 * (1 - difficulty_modifier)

	# Simulate hint usage
	if random.random() < hint_tendency:
	max_hint = random.choices(
	[1, 2, 3, 4],
	weights=[0.3, 0.3, 0.25, 0.15],
	)[0]
	else:
	max_hint = 0

	is_correct = random.random() < p_correct
	if max_hint == 4:
	is_correct = False # L4 = solution reveal

	# Generate plausible timing
	base_time = 30 + true_level_idx * 5
	total_time = max(10, base_time + random.gauss(0, 10))

	scaffold_total = 0
	t_l1, t_l2, t_l3, t_l4 = 0.0, 0.0, 0.0, 0.0
	if max_hint >= 1:
	t_l1 = random.uniform(3, 10)
	scaffold_total += t_l1
	if max_hint >= 2:
	t_l2 = random.uniform(5, 15)
	scaffold_total += t_l2
	if max_hint >= 3:
	t_l3 = random.uniform(8, 20)
	scaffold_total += t_l3
	if max_hint >= 4:
	t_l4 = random.uniform(10, 25)
	scaffold_total += t_l4

	total_time = max(total_time, scaffold_total + 5)

	topic = random.choice(TOPICS)

	signals = InteractionSignals(
	max_hint_level=max_hint,
	time_before_first_hint=random.uniform(2, 15) if max_hint > 0 else 0,
	total_time=total_time,
	time_at_L1=t_l1,
	time_at_L2=t_l2,
	time_at_L3=t_l3,
	time_at_L4=t_l4,
	num_attempts=1 if is_correct and max_hint == 0 else random.randint(1, 3),
	is_correct=is_correct,
	question_level=current_level,
	)

	result = engine.process_interaction(signals, question_elo, topic)
	results.append(result)

	# Summary
	final_elo = engine.state.student_elo
	final_level = engine.state.current_level
	avg_wo = sum(r["weighted_outcome"] for r in results) / len(results)
	avg_lds = sum(r["features"].lds for r in results) / len(results)
	avg_mcs = sum(r["features"].mcs for r in results) / len(results)

	decisions = {}
	for r in results:
	d = r["decision"]
	decisions[d] = decisions.get(d, 0) + 1

	return {
	"profile": profile_name,
	"true_level": LEVELS[true_level_idx],
	"start_elo": INITIAL_STUDENT_ELO,
	"final_elo": round(final_elo, 1),
	"final_level": final_level,
	"avg_weighted_outcome": round(avg_wo, 3),
	"avg_lds": round(avg_lds, 3),
	"avg_mcs": round(avg_mcs, 3),
	"decisions": decisions,
	}


	def _run_simulation():
	print("=" * 70)
	print("MathLingua Adaptive Engine — Simulation Results")
	print("=" * 70)

	profiles = [
	("Strong Student (true ~2.5)", 9, 0.85, 0.15),
	("Struggling Student (true ~1.2)", 1, 0.45, 0.70),
	("Average Student (true ~1.5)", 4, 0.65, 0.40),
	]

	for name, true_idx, p_correct, hint_tend in profiles:
	result = simulate_student_profile(name, true_idx, p_correct, hint_tend)
	print(f"\n{'─' * 50}")
	print(f"Profile: {result['profile']}")
	print(f" True level: {result['true_level']}")
	print(f" Elo: {result['start_elo']} → {result['final_elo']}")
	print(f" Level: 2.1 → {result['final_level']}")
	print(f" Avg weighted outcome: {result['avg_weighted_outcome']}")
	print(f" Avg LDS: {result['avg_lds']}")
	print(f" Avg MCS: {result['avg_mcs']}")
	print(f" Decisions: {result['decisions']}")

	print(f"\n{'=' * 70}")
	print("Simulation completed successfully ✓")
	print(f"{'=' * 70}")


	if __name__ == "__main__":
	_run_simulation()