| """ |
| Multi-Modal Confusion Detection Module for ContextFlow |
| |
| Combines audio, biometric, and behavioral signals for comprehensive confusion detection. |
| Addresses: Multi-modal Confusion Detection requirement |
| """ |
|
|
| import numpy as np |
| from typing import Dict, List, Optional, Tuple, Any |
| from dataclasses import dataclass, field |
| from collections import deque |
| import threading |
| import time |
|
|
|
|
| @dataclass |
| class AudioFeatures: |
| """Audio features extracted from voice/speech""" |
| speech_rate: float = 0.0 |
| pause_frequency: float = 0.0 |
| pause_duration: float = 0.0 |
| pitch_variation: float = 0.0 |
| volume_level: float = 0.0 |
| hesitations: int = 0 |
| question_markers: int = 0 |
| |
| def to_vector(self) -> np.ndarray: |
| """Convert to 7-dim feature vector""" |
| return np.array([ |
| self.speech_rate / 200, |
| self.pause_frequency / 10, |
| self.pause_duration / 5000, |
| self.pitch_variation / 50, |
| self.volume_level, |
| self.hesitations / 20, |
| self.question_markers / 10 |
| ]) |
|
|
|
|
| @dataclass |
| class BiometricFeatures: |
| """Biometric features for confusion detection""" |
| heart_rate: float = 0.0 |
| heart_rate_variability: float = 0.0 |
| skin_conductance: float = 0.0 |
| skin_temperature: float = 0.0 |
| eye_blink_rate: float = 0.0 |
| eye_open_duration: float = 0.0 |
| |
| def to_vector(self) -> np.ndarray: |
| """Convert to 6-dim feature vector""" |
| return np.array([ |
| (self.heart_rate - 60) / 60, |
| self.heart_rate_variability / 50, |
| self.skin_conductance / 20, |
| (self.skin_temperature - 36) / 2, |
| (self.eye_blink_rate - 15) / 15, |
| self.eye_open_duration / 500 |
| ]) |
|
|
|
|
| @dataclass |
| class BehavioralFeatures: |
| """Behavioral features (existing confusion signals)""" |
| mouse_hesitation: float = 0.0 |
| scroll_reversals: float = 0.0 |
| time_on_page: float = 0.0 |
| click_frequency: float = 0.0 |
| back_button: float = 0.0 |
| tab_switches: float = 0.0 |
| copy_attempts: float = 0.0 |
| search_usage: float = 0.0 |
| |
| def to_vector(self) -> np.ndarray: |
| """Convert to 8-dim feature vector""" |
| return np.array([ |
| self.mouse_hesitation / 5, |
| self.scroll_reversals / 10, |
| self.time_on_page / 300, |
| self.click_frequency / 20, |
| self.back_button / 5, |
| self.tab_switches / 10, |
| self.copy_attempts / 5, |
| self.search_usage / 5 |
| ]) |
|
|
|
|
| class MultiModalFusion: |
| """ |
| Fuses multiple signal modalities into unified confusion assessment. |
| |
| Supported modalities: |
| - Audio: Speech patterns, hesitations |
| - Biometric: Heart rate, GSR, eye tracking |
| - Behavioral: Mouse, keyboard, scrolling patterns |
| """ |
| |
| def __init__( |
| self, |
| audio_weight: float = 0.2, |
| biometric_weight: float = 0.3, |
| behavioral_weight: float = 0.5 |
| ): |
| self.audio_weight = audio_weight |
| self.biometric_weight = biometric_weight |
| self.behavioral_weight = behavioral_weight |
| |
| |
| self.audio_threshold = 0.6 |
| self.biometric_threshold = 0.7 |
| self.behavioral_threshold = 0.5 |
| |
| |
| self.audio_history = deque(maxlen=30) |
| self.biometric_history = deque(maxlen=60) |
| self.behavioral_history = deque(maxlen=100) |
| |
| |
| self.lock = threading.Lock() |
| |
| def update_audio(self, features: AudioFeatures): |
| """Update audio feature buffer""" |
| with self.lock: |
| self.audio_history.append(features) |
| |
| def update_biometric(self, features: BiometricFeatures): |
| """Update biometric feature buffer""" |
| with self.lock: |
| self.biometric_history.append(features) |
| |
| def update_behavioral(self, features: BehavioralFeatures): |
| """Update behavioral feature buffer""" |
| with self.lock: |
| self.behavioral_history.append(features) |
| |
| def get_audio_confusion(self) -> Tuple[float, str]: |
| """Get confusion score from audio signals""" |
| with self.lock: |
| if not self.audio_history: |
| return 0.0, "no_audio" |
| |
| recent = list(self.audio_history)[-10:] |
| |
| |
| speech_rate = np.mean([f.speech_rate for f in recent]) |
| hesitations = np.mean([f.hesitations for f in recent]) |
| pause_freq = np.mean([f.pause_frequency for f in recent]) |
| |
| |
| |
| |
| |
| |
| confusion = 0.0 |
| |
| if speech_rate < 120: |
| confusion += 0.3 |
| if hesitations > 5: |
| confusion += 0.4 |
| if pause_freq > 3: |
| confusion += 0.3 |
| |
| confusion = min(confusion, 1.0) |
| |
| return confusion, self._get_audio_reason(hesitations, speech_rate, pause_freq) |
| |
| def _get_audio_reason(self, hesitations: float, speech_rate: float, pause_freq: float) -> str: |
| """Generate human-readable audio confusion reason""" |
| reasons = [] |
| if hesitations > 5: |
| reasons.append("frequent_hesitations") |
| if speech_rate < 120: |
| reasons.append("slow_speech") |
| if pause_freq > 3: |
| reasons.append("frequent_pauses") |
| return ",".join(reasons) if reasons else "normal" |
| |
| def get_biometric_confusion(self) -> Tuple[float, str]: |
| """Get confusion score from biometric signals""" |
| with self.lock: |
| if not self.biometric_history: |
| return 0.0, "no_biometric" |
| |
| recent = list(self.biometric_history)[-20:] |
| |
| hr = np.mean([f.heart_rate for f in recent]) |
| hrv = np.mean([f.heart_rate_variability for f in recent]) |
| gsr = np.mean([f.skin_conductance for f in recent]) |
| |
| |
| |
| |
| |
| |
| confusion = 0.0 |
| |
| if hr > 85: |
| confusion += 0.3 |
| if hrv < 30: |
| confusion += 0.3 |
| if gsr > 10: |
| confusion += 0.4 |
| |
| confusion = min(confusion, 1.0) |
| |
| return confusion, self._get_biometric_reason(hr, hrv, gsr) |
| |
| def _get_biometric_reason(self, hr: float, hrv: float, gsr: float) -> str: |
| """Generate human-readable biometric confusion reason""" |
| reasons = [] |
| if hr > 85: |
| reasons.append("elevated_heart_rate") |
| if hrv < 30: |
| reasons.append("low_hrv") |
| if gsr > 10: |
| reasons.append("high_arousal") |
| return ",".join(reasons) if reasons else "normal" |
| |
| def get_behavioral_confusion(self) -> Tuple[float, str]: |
| """Get confusion score from behavioral signals""" |
| with self.lock: |
| if not self.behavioral_history: |
| return 0.0, "no_behavioral" |
| |
| recent = list(self.behavioral_history)[-20:] |
| |
| mouse_h = np.mean([f.mouse_hesitation for f in recent]) |
| scrolls = np.mean([f.scroll_reversals for f in recent]) |
| back_btn = np.mean([f.back_button for f in recent]) |
| |
| confusion = 0.0 |
| |
| if mouse_h > 3: |
| confusion += 0.3 |
| if scrolls > 5: |
| confusion += 0.3 |
| if back_btn > 3: |
| confusion += 0.2 |
| |
| confusion = min(confusion, 1.0) |
| |
| return confusion, self._get_behavioral_reason(mouse_h, scrolls, back_btn) |
| |
| def _get_behavioral_reason(self, mouse_h: float, scrolls: float, back_btn: float) -> str: |
| """Generate human-readable behavioral confusion reason""" |
| reasons = [] |
| if mouse_h > 3: |
| reasons.append("mouse_hesitation") |
| if scrolls > 5: |
| reasons.append("scroll_reversals") |
| if back_btn > 3: |
| reasons.append("back_button_usage") |
| return ",".join(reasons) if reasons else "normal" |
| |
| def get_fused_confusion(self) -> Dict[str, Any]: |
| """ |
| Get fused multi-modal confusion assessment. |
| |
| Returns: |
| Dict with confusion scores, reasons, and confidence |
| """ |
| audio_score, audio_reason = self.get_audio_confusion() |
| biometric_score, biometric_reason = self.get_biometric_confusion() |
| behavioral_score, behavioral_reason = self.get_behavioral_confusion() |
| |
| |
| fused_score = ( |
| audio_score * self.audio_weight + |
| biometric_score * self.biometric_weight + |
| behavioral_score * self.behavioral_weight |
| ) |
| |
| |
| n_signals = sum([ |
| len(self.audio_history) > 0, |
| len(self.biometric_history) > 0, |
| len(self.behavioral_history) > 0 |
| ]) |
| confidence = min(n_signals / 3.0, 1.0) |
| |
| |
| contributions = { |
| 'audio': audio_score * self.audio_weight, |
| 'biometric': biometric_score * self.biometric_weight, |
| 'behavioral': behavioral_score * self.behavioral_weight |
| } |
| primary_indicator = max(contributions, key=contributions.get) |
| |
| return { |
| 'confusion_score': fused_score, |
| 'confidence': confidence, |
| 'primary_indicator': primary_indicator, |
| 'audio_score': audio_score, |
| 'biometric_score': biometric_score, |
| 'behavioral_score': behavioral_score, |
| 'audio_reason': audio_reason, |
| 'biometric_reason': biometric_reason, |
| 'behavioral_reason': behavioral_reason, |
| 'suggested_action': self._get_suggested_action(fused_score, primary_indicator), |
| 'available_modalities': { |
| 'audio': len(self.audio_history) > 0, |
| 'biometric': len(self.biometric_history) > 0, |
| 'behavioral': len(self.behavioral_history) > 0 |
| } |
| } |
| |
| def _get_suggested_action(self, score: float, primary: str) -> str: |
| """Get suggested intervention based on confusion level""" |
| if score < 0.3: |
| return "continue_learning" |
| elif score < 0.5: |
| return "offer_hint" |
| elif score < 0.7: |
| return "trigger_ai_explanation" |
| else: |
| return "pause_and_assess" |
| |
| def reset(self): |
| """Reset all buffers""" |
| with self.lock: |
| self.audio_history.clear() |
| self.biometric_history.clear() |
| self.behavioral_history.clear() |
|
|
|
|
| class AudioAnalyzer: |
| """ |
| Real-time audio analysis for confusion detection. |
| |
| Requires: microphone input (simulated for now) |
| """ |
| |
| def __init__(self): |
| self.sample_buffer = deque(maxlen=1000) |
| self.is_recording = False |
| self.sample_rate = 16000 |
| |
| def start_recording(self): |
| """Start audio capture""" |
| self.is_recording = True |
| self.sample_buffer.clear() |
| |
| def stop_recording(self): |
| """Stop audio capture""" |
| self.is_recording = False |
| |
| def add_audio_sample(self, amplitude: float): |
| """Add audio amplitude sample""" |
| if self.is_recording: |
| self.sample_buffer.append({ |
| 'amplitude': amplitude, |
| 'timestamp': time.time() |
| }) |
| |
| def analyze(self) -> AudioFeatures: |
| """Analyze audio buffer and extract features""" |
| if len(self.sample_buffer) < 100: |
| return AudioFeatures() |
| |
| amplitudes = [s['amplitude'] for s in self.sample_buffer] |
| |
| |
| features = AudioFeatures() |
| |
| |
| threshold = np.mean(amplitudes) * 0.3 |
| is_pause = amplitudes < threshold |
| pause_durations = [] |
| current_pause = 0 |
| |
| for p in is_pause: |
| if p: |
| current_pause += 1 |
| else: |
| if current_pause > 0: |
| pause_durations.append(current_pause) |
| current_pause = 0 |
| |
| features.pause_frequency = len(pause_durations) / (len(amplitudes) / self.sample_rate) * 60 |
| features.pause_duration = np.mean(pause_durations) * 1000 / self.sample_rate if pause_durations else 0 |
| |
| |
| features.volume_level = np.mean(amplitudes) |
| |
| return features |
|
|
|
|
| class BiometricProcessor: |
| """ |
| Processes biometric data for confusion detection. |
| |
| Supports: heart rate monitors, GSR sensors, eye trackers |
| """ |
| |
| def __init__(self): |
| self.data_buffer = deque(maxlen=60) |
| |
| def add_reading( |
| self, |
| heart_rate: Optional[float] = None, |
| hrv: Optional[float] = None, |
| gsr: Optional[float] = None, |
| skin_temp: Optional[float] = None, |
| blink_rate: Optional[float] = None, |
| eye_open: Optional[float] = None |
| ): |
| """Add biometric reading""" |
| self.data_buffer.append({ |
| 'heart_rate': heart_rate, |
| 'hrv': hrv, |
| 'gsr': gsr, |
| 'skin_temp': skin_temp, |
| 'blink_rate': blink_rate, |
| 'eye_open': eye_open, |
| 'timestamp': time.time() |
| }) |
| |
| def analyze(self) -> BiometricFeatures: |
| """Analyze biometric buffer and extract features""" |
| if len(self.data_buffer) < 5: |
| return BiometricFeatures() |
| |
| features = BiometricFeatures() |
| |
| hr_values = [d['heart_rate'] for d in self.data_buffer if d['heart_rate']] |
| hrv_values = [d['hrv'] for d in self.data_buffer if d['hrv']] |
| gsr_values = [d['gsr'] for d in self.data_buffer if d['gsr']] |
| |
| if hr_values: |
| features.heart_rate = np.mean(hr_values) |
| if hrv_values: |
| features.heart_rate_variability = np.mean(hrv_values) |
| if gsr_values: |
| features.skin_conductance = np.mean(gsr_values) |
| |
| return features |
|
|
|
|
| |
| class MultiModalAPI: |
| """REST API for multi-modal confusion detection""" |
| |
| def __init__(self, fusion: MultiModalFusion): |
| self.fusion = fusion |
| self.audio_analyzer = AudioAnalyzer() |
| self.biometric_processor = BiometricProcessor() |
| |
| def process_audio(self, amplitude: float): |
| """Process audio sample""" |
| self.audio_analyzer.add_audio_sample(amplitude) |
| features = self.audio_analyzer.analyze() |
| self.fusion.update_audio(features) |
| return features |
| |
| def process_biometric( |
| self, |
| heart_rate: Optional[float] = None, |
| hrv: Optional[float] = None, |
| gsr: Optional[float] = None |
| ): |
| """Process biometric data""" |
| self.biometric_processor.add_reading( |
| heart_rate=heart_rate, |
| hrv=hrv, |
| gsr=gsr |
| ) |
| features = self.biometric_processor.analyze() |
| self.fusion.update_biometric(features) |
| return features |
| |
| def process_behavioral( |
| self, |
| mouse_hesitation: float = 0, |
| scroll_reversals: float = 0, |
| time_on_page: float = 0 |
| ): |
| """Process behavioral data""" |
| features = BehavioralFeatures( |
| mouse_hesitation=mouse_hesitation, |
| scroll_reversals=scroll_reversals, |
| time_on_page=time_on_page |
| ) |
| self.fusion.update_behavioral(features) |
| return features |
| |
| def get_confusion_assessment(self) -> Dict: |
| """Get multi-modal confusion assessment""" |
| return self.fusion.get_fused_confusion() |
|
|
|
|
| |
| if __name__ == "__main__": |
| fusion = MultiModalFusion() |
| api = MultiModalAPI(fusion) |
| |
| print("Multi-Modal Confusion Detection Demo") |
| print("=" * 40) |
| |
| |
| for i in range(20): |
| |
| api.process_audio(amplitude=0.3 if i < 10 else 0.1) |
| |
| |
| api.process_biometric( |
| heart_rate=75 + i * 0.5, |
| hrv=40 - i * 0.3, |
| gsr=8 + i * 0.2 |
| ) |
| |
| |
| api.process_behavioral( |
| mouse_hesitation=2 + i * 0.2, |
| scroll_reversals=3 + i * 0.3, |
| time_on_page=60 + i * 3 |
| ) |
| |
| |
| result = api.get_confusion_assessment() |
| |
| print(f"Confusion Score: {result['confusion_score']:.2f}") |
| print(f"Confidence: {result['confidence']:.2f}") |
| print(f"Primary Indicator: {result['primary_indicator']}") |
| print(f"Biometric Score: {result['biometric_score']:.2f}") |
| print(f"Behavioral Score: {result['behavioral_score']:.2f}") |
| print(f"Suggested Action: {result['suggested_action']}") |
|
|