| """ |
| MathLingua β Feature Engineering Module |
| |
| Computes Language Dependency Score (LDS) and Math Confidence Score (MCS) |
| from student interaction data. These two engineered features disentangle |
| linguistic struggle from mathematical difficulty, enabling the adaptive |
| engine to make targeted decisions. |
| |
| Reference: MathLingua Technical Specification Β§5 |
| """ |
|
|
| from __future__ import annotations |
|
|
| import math |
| from dataclasses import dataclass, field |
| from typing import Optional |
|
|
|
|
| |
| |
| |
|
|
| @dataclass |
| class InteractionSignals: |
| """Raw signals captured from a single student-question interaction.""" |
|
|
| max_hint_level: int = 0 |
| time_before_first_hint: float = 0.0 |
| total_time: float = 0.0 |
| time_at_L1: float = 0.0 |
| time_at_L2: float = 0.0 |
| time_at_L3: float = 0.0 |
| time_at_L4: float = 0.0 |
| num_attempts: int = 1 |
| is_correct: bool = False |
| question_level: str = "1.1" |
|
|
|
|
| @dataclass |
| class EngineeredFeatures: |
| """Output of the feature engineering pipeline for one interaction.""" |
|
|
| |
| hint_depth_normalized: float = 0.0 |
| scaffold_time_ratio: float = 0.0 |
| escalation_speed: float = 0.0 |
| reveal_flag: float = 0.0 |
|
|
| |
| correctness: float = 0.0 |
| speed_factor: float = 0.0 |
| attempt_efficiency: float = 0.0 |
|
|
| |
| lds: float = 0.0 |
| mcs: float = 0.0 |
|
|
| |
| quadrant: str = "" |
|
|
|
|
| |
| |
| |
| |
|
|
| DEFAULT_MEDIAN_TIMES: dict[str, float] = { |
| "1.1": 30.0, "1.2": 35.0, "1.3": 40.0, "1.4": 45.0, "1.5": 50.0, |
| "2.1": 55.0, "2.2": 60.0, "2.3": 65.0, "2.4": 70.0, "2.5": 75.0, |
| "3.1": 80.0, "3.2": 85.0, "3.3": 90.0, "3.4": 95.0, "3.5": 100.0, |
| } |
|
|
|
|
| |
| |
| |
|
|
| class FeatureEngineer: |
| """ |
| Computes LDS and MCS from raw interaction signals. |
| |
| LDS = clamp(0.35Β·D_hint + 0.25Β·R_scaffold + 0.20Β·E_speed + 0.20Β·F_reveal, 0, 1) |
| MCS = clamp(0.30Β·C_correct + 0.25Β·S_speed + 0.20Β·A_efficiency + 0.25Β·(1-LDS), 0, 1) |
| |
| The 2Γ2 diagnostic quadrant is derived from thresholds: |
| LDS < 0.4 & MCS β₯ 0.6 β Thriving |
| LDS β₯ 0.4 & MCS β₯ 0.6 β Language Gap |
| LDS < 0.4 & MCS < 0.6 β Math Struggle |
| LDS β₯ 0.4 & MCS < 0.6 β Dual Challenge |
| """ |
|
|
| |
| W1: float = 0.35 |
| W2: float = 0.25 |
| W3: float = 0.20 |
| W4: float = 0.20 |
|
|
| |
| W5: float = 0.30 |
| W6: float = 0.25 |
| W7: float = 0.20 |
| W8: float = 0.25 |
|
|
| |
| LDS_THRESHOLD: float = 0.4 |
| MCS_THRESHOLD: float = 0.6 |
|
|
| def __init__(self, median_times: Optional[dict[str, float]] = None): |
| self.median_times = median_times or DEFAULT_MEDIAN_TIMES |
|
|
| @staticmethod |
| def _clamp(value: float, lo: float = 0.0, hi: float = 1.0) -> float: |
| return max(lo, min(hi, value)) |
|
|
| |
|
|
| def _hint_depth_normalized(self, signals: InteractionSignals) -> float: |
| """D_hint = h_i / 4""" |
| return signals.max_hint_level / 4.0 |
|
|
| def _scaffold_time_ratio(self, signals: InteractionSignals) -> float: |
| """R_scaffold = scaffold_time / total_time""" |
| scaffold_time = ( |
| signals.time_at_L1 + signals.time_at_L2 + |
| signals.time_at_L3 + signals.time_at_L4 |
| ) |
| if signals.total_time <= 0: |
| return 0.0 |
| return self._clamp(scaffold_time / signals.total_time) |
|
|
| def _escalation_speed(self, signals: InteractionSignals) -> float: |
| """E_speed = 1 - (t_pre / median_time) if hints used, else 0""" |
| if signals.max_hint_level == 0: |
| return 0.0 |
| median = self.median_times.get(signals.question_level, 60.0) |
| if median <= 0: |
| return 1.0 |
| raw = 1.0 - (signals.time_before_first_hint / median) |
| return self._clamp(raw) |
|
|
| def _reveal_flag(self, signals: InteractionSignals) -> float: |
| """F_reveal = 1.0 if L4 accessed, else 0.0""" |
| return 1.0 if signals.max_hint_level == 4 else 0.0 |
|
|
| def _correctness(self, signals: InteractionSignals) -> float: |
| """C_correct β {0, 1}""" |
| return 1.0 if signals.is_correct else 0.0 |
|
|
| def _speed_factor(self, signals: InteractionSignals) -> float: |
| """S_speed = clamp(median_time / total_time, 0, 1)""" |
| median = self.median_times.get(signals.question_level, 60.0) |
| if signals.total_time <= 0: |
| return 0.0 |
| return self._clamp(median / signals.total_time) |
|
|
| def _attempt_efficiency(self, signals: InteractionSignals) -> float: |
| """A_efficiency = 1 / attempts""" |
| if signals.num_attempts <= 0: |
| return 0.0 |
| return 1.0 / signals.num_attempts |
|
|
| |
|
|
| def _compute_lds(self, d_hint: float, r_scaffold: float, |
| e_speed: float, f_reveal: float) -> float: |
| raw = (self.W1 * d_hint + self.W2 * r_scaffold + |
| self.W3 * e_speed + self.W4 * f_reveal) |
| return self._clamp(raw) |
|
|
| def _compute_mcs(self, c_correct: float, s_speed: float, |
| a_efficiency: float, lds: float) -> float: |
| raw = (self.W5 * c_correct + self.W6 * s_speed + |
| self.W7 * a_efficiency + self.W8 * (1.0 - lds)) |
| return self._clamp(raw) |
|
|
| def _classify_quadrant(self, lds: float, mcs: float) -> str: |
| if lds < self.LDS_THRESHOLD and mcs >= self.MCS_THRESHOLD: |
| return "thriving" |
| elif lds >= self.LDS_THRESHOLD and mcs >= self.MCS_THRESHOLD: |
| return "language_gap" |
| elif lds < self.LDS_THRESHOLD and mcs < self.MCS_THRESHOLD: |
| return "math_struggle" |
| else: |
| return "dual_challenge" |
|
|
| |
|
|
| def compute(self, signals: InteractionSignals) -> EngineeredFeatures: |
| """Compute all engineered features from raw interaction signals.""" |
|
|
| d_hint = self._hint_depth_normalized(signals) |
| r_scaffold = self._scaffold_time_ratio(signals) |
| e_speed = self._escalation_speed(signals) |
| f_reveal = self._reveal_flag(signals) |
|
|
| c_correct = self._correctness(signals) |
| s_speed = self._speed_factor(signals) |
| a_efficiency = self._attempt_efficiency(signals) |
|
|
| lds = self._compute_lds(d_hint, r_scaffold, e_speed, f_reveal) |
| mcs = self._compute_mcs(c_correct, s_speed, a_efficiency, lds) |
| quadrant = self._classify_quadrant(lds, mcs) |
|
|
| return EngineeredFeatures( |
| hint_depth_normalized=round(d_hint, 4), |
| scaffold_time_ratio=round(r_scaffold, 4), |
| escalation_speed=round(e_speed, 4), |
| reveal_flag=f_reveal, |
| correctness=c_correct, |
| speed_factor=round(s_speed, 4), |
| attempt_efficiency=round(a_efficiency, 4), |
| lds=round(lds, 4), |
| mcs=round(mcs, 4), |
| quadrant=quadrant, |
| ) |
|
|
| def compute_weighted_outcome(self, is_correct: bool, |
| max_hint_level: int) -> float: |
| """ |
| Hint-weighted outcome for Elo/BKT updates. |
| 1.00 = correct, no hints |
| 0.75 = correct, L1 only |
| 0.50 = correct, L2 |
| 0.25 = correct, L3 |
| 0.00 = incorrect, or L4 used |
| """ |
| if not is_correct or max_hint_level == 4: |
| return 0.0 |
| outcome_map = {0: 1.0, 1: 0.75, 2: 0.50, 3: 0.25} |
| return outcome_map.get(max_hint_level, 0.0) |
|
|
|
|
| |
| |
| |
|
|
| def _run_examples(): |
| fe = FeatureEngineer() |
|
|
| print("=" * 70) |
| print("MathLingua Feature Engineering β Worked Examples") |
| print("=" * 70) |
|
|
| |
| signals1 = InteractionSignals( |
| max_hint_level=0, |
| time_before_first_hint=0.0, |
| total_time=25.0, |
| is_correct=True, |
| num_attempts=1, |
| question_level="2.1", |
| ) |
| f1 = fe.compute(signals1) |
| print(f"\nExample 1 β Strong student, no hints, fast solve") |
| print(f" LDS = {f1.lds:.3f} (expected ~0.0)") |
| print(f" MCS = {f1.mcs:.3f} (expected ~1.0)") |
| print(f" Quadrant: {f1.quadrant}") |
| print(f" Weighted outcome: {fe.compute_weighted_outcome(True, 0)}") |
|
|
| |
| signals2 = InteractionSignals( |
| max_hint_level=3, |
| time_before_first_hint=5.0, |
| total_time=90.0, |
| time_at_L1=10.0, |
| time_at_L2=15.0, |
| time_at_L3=30.0, |
| is_correct=True, |
| num_attempts=2, |
| question_level="2.3", |
| ) |
| f2 = fe.compute(signals2) |
| print(f"\nExample 2 β Language-dependent, used L3, correct on 2nd try") |
| print(f" LDS = {f2.lds:.3f} (expected ~0.5-0.6)") |
| print(f" MCS = {f2.mcs:.3f} (expected ~0.3-0.4)") |
| print(f" Quadrant: {f2.quadrant}") |
| print(f" Weighted outcome: {fe.compute_weighted_outcome(True, 3)}") |
|
|
| |
| signals3 = InteractionSignals( |
| max_hint_level=0, |
| total_time=15.0, |
| is_correct=True, |
| num_attempts=1, |
| question_level="1.1", |
| ) |
| f3 = fe.compute(signals3) |
| print(f"\nExample 3 β Perfect interaction (very easy level)") |
| print(f" LDS = {f3.lds:.3f} (expected 0.0)") |
| print(f" MCS = {f3.mcs:.3f} (expected 1.0)") |
| print(f" Quadrant: {f3.quadrant}") |
|
|
| |
| signals4 = InteractionSignals( |
| max_hint_level=4, |
| time_before_first_hint=3.0, |
| total_time=120.0, |
| time_at_L1=10.0, |
| time_at_L2=15.0, |
| time_at_L3=20.0, |
| time_at_L4=40.0, |
| is_correct=False, |
| num_attempts=3, |
| question_level="3.1", |
| ) |
| f4 = fe.compute(signals4) |
| print(f"\nExample 4 β Struggling student, used all scaffolds, incorrect") |
| print(f" LDS = {f4.lds:.3f} (expected ~0.7-0.9)") |
| print(f" MCS = {f4.mcs:.3f} (expected ~0.05-0.15)") |
| print(f" Quadrant: {f4.quadrant}") |
| print(f" Weighted outcome: {fe.compute_weighted_outcome(False, 4)}") |
|
|
| print("\n" + "=" * 70) |
| print("All examples computed successfully β") |
| print("=" * 70) |
|
|
|
|
| if __name__ == "__main__": |
| _run_examples() |
|
|