namish10
/

contextflow-rl

+"""
+Multi-Modal Confusion Detection Module for ContextFlow
+Combines audio, biometric, and behavioral signals for comprehensive confusion detection.
+Addresses: Multi-modal Confusion Detection requirement
+"""
+import numpy as np
+from typing import Dict, List, Optional, Tuple, Any
+from dataclasses import dataclass, field
+from collections import deque
+import threading
+import time
+@dataclass
+class AudioFeatures:
+    """Audio features extracted from voice/speech"""
+    speech_rate: float = 0.0          # Words per minute
+    pause_frequency: float = 0.0       # Pauses per minute
+    pause_duration: float = 0.0        # Average pause duration (ms)
+    pitch_variation: float = 0.0       # Pitch standard deviation
+    volume_level: float = 0.0         # Average volume (0-1)
+    hesitations: int = 0               # Count of "uh", "um", etc.
+    question_markers: int = 0         # Rising intonation count
+    def to_vector(self) -> np.ndarray:
+        """Convert to 7-dim feature vector"""
+        return np.array([
+            self.speech_rate / 200,  # Normalize to ~0-1
+            self.pause_frequency / 10,
+            self.pause_duration / 5000,
+            self.pitch_variation / 50,
+            self.volume_level,
+            self.hesitations / 20,
+            self.question_markers / 10
+        ])
+@dataclass
+class BiometricFeatures:
+    """Biometric features for confusion detection"""
+    heart_rate: float = 0.0           # BPM
+    heart_rate_variability: float = 0.0 # HRV metric
+    skin_conductance: float = 0.0      # GSR (microsiemens)
+    skin_temperature: float = 0.0     # Celsius
+    eye_blink_rate: float = 0.0       # Blinks per minute
+    eye_open_duration: float = 0.0    # Average eye open (ms)
+    def to_vector(self) -> np.ndarray:
+        """Convert to 6-dim feature vector"""
+        return np.array([
+            (self.heart_rate - 60) / 60,  # Centered at resting HR
+            self.heart_rate_variability / 50,
+            self.skin_conductance / 20,
+            (self.skin_temperature - 36) / 2,  # Centered at 36C
+            (self.eye_blink_rate - 15) / 15,  # Centered at normal
+            self.eye_open_duration / 500
+        ])
+@dataclass
+class BehavioralFeatures:
+    """Behavioral features (existing confusion signals)"""
+    mouse_hesitation: float = 0.0
+    scroll_reversals: float = 0.0
+    time_on_page: float = 0.0
+    click_frequency: float = 0.0
+    back_button: float = 0.0
+    tab_switches: float = 0.0
+    copy_attempts: float = 0.0
+    search_usage: float = 0.0
+    def to_vector(self) -> np.ndarray:
+        """Convert to 8-dim feature vector"""
+        return np.array([
+            self.mouse_hesitation / 5,
+            self.scroll_reversals / 10,
+            self.time_on_page / 300,
+            self.click_frequency / 20,
+            self.back_button / 5,
+            self.tab_switches / 10,
+            self.copy_attempts / 5,
+            self.search_usage / 5
+        ])
+class MultiModalFusion:
+    """
+    Fuses multiple signal modalities into unified confusion assessment.
+    Supported modalities:
+    - Audio: Speech patterns, hesitations
+    - Biometric: Heart rate, GSR, eye tracking
+    - Behavioral: Mouse, keyboard, scrolling patterns
+    """
+    def __init__(
+        self,
+        audio_weight: float = 0.2,
+        biometric_weight: float = 0.3,
+        behavioral_weight: float = 0.5
+    ):
+        self.audio_weight = audio_weight
+        self.biometric_weight = biometric_weight
+        self.behavioral_weight = behavioral_weight
+        # Modality-specific thresholds
+        self.audio_threshold = 0.6
+        self.biometric_threshold = 0.7
+        self.behavioral_threshold = 0.5
+        # History buffers
+        self.audio_history = deque(maxlen=30)   # Last 30 seconds
+        self.biometric_history = deque(maxlen=60)  # Last 60 seconds
+        self.behavioral_history = deque(maxlen=100)  # Last 100 events
+        # Thread safety
+        self.lock = threading.Lock()
+    def update_audio(self, features: AudioFeatures):
+        """Update audio feature buffer"""
+        with self.lock:
+            self.audio_history.append(features)
+    def update_biometric(self, features: BiometricFeatures):
+        """Update biometric feature buffer"""
+        with self.lock:
+            self.biometric_history.append(features)
+    def update_behavioral(self, features: BehavioralFeatures):
+        """Update behavioral feature buffer"""
+        with self.lock:
+            self.behavioral_history.append(features)
+    def get_audio_confusion(self) -> Tuple[float, str]:
+        """Get confusion score from audio signals"""
+        with self.lock:
+            if not self.audio_history:
+                return 0.0, "no_audio"
+            recent = list(self.audio_history)[-10:]  # Last 10 samples
+            # Compute weighted features
+            speech_rate = np.mean([f.speech_rate for f in recent])
+            hesitations = np.mean([f.hesitations for f in recent])
+            pause_freq = np.mean([f.pause_frequency for f in recent])
+            # Confusion indicators:
+            # - Slower speech rate
+            # - More hesitations
+            # - More pauses
+            confusion = 0.0
+            if speech_rate < 120:  # Slow speech
+                confusion += 0.3
+            if hesitations > 5:  # Many hesitations
+                confusion += 0.4
+            if pause_freq > 3:  # Frequent pauses
+                confusion += 0.3
+            confusion = min(confusion, 1.0)
+            return confusion, self._get_audio_reason(hesitations, speech_rate, pause_freq)
+    def _get_audio_reason(self, hesitations: float, speech_rate: float, pause_freq: float) -> str:
+        """Generate human-readable audio confusion reason"""
+        reasons = []
+        if hesitations > 5:
+            reasons.append("frequent_hesitations")
+        if speech_rate < 120:
+            reasons.append("slow_speech")
+        if pause_freq > 3:
+            reasons.append("frequent_pauses")
+        return ",".join(reasons) if reasons else "normal"
+    def get_biometric_confusion(self) -> Tuple[float, str]:
+        """Get confusion score from biometric signals"""
+        with self.lock:
+            if not self.biometric_history:
+                return 0.0, "no_biometric"
+            recent = list(self.biometric_history)[-20:]  # Last 20 samples
+            hr = np.mean([f.heart_rate for f in recent])
+            hrv = np.mean([f.heart_rate_variability for f in recent])
+            gsr = np.mean([f.skin_conductance for f in recent])
+            # Confusion indicators:
+            # - Elevated heart rate
+            # - Lower HRV (stress indicator)
+            # - Higher GSR (arousal)
+            confusion = 0.0
+            if hr > 85:  # Elevated HR
+                confusion += 0.3
+            if hrv < 30:  # Low HRV
+                confusion += 0.3
+            if gsr > 10:  # Elevated GSR
+                confusion += 0.4
+            confusion = min(confusion, 1.0)
+            return confusion, self._get_biometric_reason(hr, hrv, gsr)
+    def _get_biometric_reason(self, hr: float, hrv: float, gsr: float) -> str:
+        """Generate human-readable biometric confusion reason"""
+        reasons = []
+        if hr > 85:
+            reasons.append("elevated_heart_rate")
+        if hrv < 30:
+            reasons.append("low_hrv")
+        if gsr > 10:
+            reasons.append("high_arousal")
+        return ",".join(reasons) if reasons else "normal"
+    def get_behavioral_confusion(self) -> Tuple[float, str]:
+        """Get confusion score from behavioral signals"""
+        with self.lock:
+            if not self.behavioral_history:
+                return 0.0, "no_behavioral"
+            recent = list(self.behavioral_history)[-20:]  # Last 20 events
+            mouse_h = np.mean([f.mouse_hesitation for f in recent])
+            scrolls = np.mean([f.scroll_reversals for f in recent])
+            back_btn = np.mean([f.back_button for f in recent])
+            confusion = 0.0
+            if mouse_h > 3:
+                confusion += 0.3
+            if scrolls > 5:
+                confusion += 0.3
+            if back_btn > 3:
+                confusion += 0.2
+            confusion = min(confusion, 1.0)
+            return confusion, self._get_behavioral_reason(mouse_h, scrolls, back_btn)
+    def _get_behavioral_reason(self, mouse_h: float, scrolls: float, back_btn: float) -> str:
+        """Generate human-readable behavioral confusion reason"""
+        reasons = []
+        if mouse_h > 3:
+            reasons.append("mouse_hesitation")
+        if scrolls > 5:
+            reasons.append("scroll_reversals")
+        if back_btn > 3:
+            reasons.append("back_button_usage")
+        return ",".join(reasons) if reasons else "normal"
+    def get_fused_confusion(self) -> Dict[str, Any]:
+        """
+        Get fused multi-modal confusion assessment.
+        Returns:
+            Dict with confusion scores, reasons, and confidence
+        """
+        audio_score, audio_reason = self.get_audio_confusion()
+        biometric_score, biometric_reason = self.get_biometric_confusion()
+        behavioral_score, behavioral_reason = self.get_behavioral_confusion()
+        # Weighted fusion
+        fused_score = (
+            audio_score * self.audio_weight +
+            biometric_score * self.biometric_weight +
+            behavioral_score * self.behavioral_weight
+        )
+        # Confidence based on signal availability
+        n_signals = sum([
+            len(self.audio_history) > 0,
+            len(self.biometric_history) > 0,
+            len(self.behavioral_history) > 0
+        ])
+        confidence = min(n_signals / 3.0, 1.0)
+        # Primary indicator (highest weighted contribution)
+        contributions = {
+            'audio': audio_score * self.audio_weight,
+            'biometric': biometric_score * self.biometric_weight,
+            'behavioral': behavioral_score * self.behavioral_weight
+        }
+        primary_indicator = max(contributions, key=contributions.get)
+        return {
+            'confusion_score': fused_score,
+            'confidence': confidence,
+            'primary_indicator': primary_indicator,
+            'audio_score': audio_score,
+            'biometric_score': biometric_score,
+            'behavioral_score': behavioral_score,
+            'audio_reason': audio_reason,
+            'biometric_reason': biometric_reason,
+            'behavioral_reason': behavioral_reason,
+            'suggested_action': self._get_suggested_action(fused_score, primary_indicator),
+            'available_modalities': {
+                'audio': len(self.audio_history) > 0,
+                'biometric': len(self.biometric_history) > 0,
+                'behavioral': len(self.behavioral_history) > 0
+            }
+        }
+    def _get_suggested_action(self, score: float, primary: str) -> str:
+        """Get suggested intervention based on confusion level"""
+        if score < 0.3:
+            return "continue_learning"
+        elif score < 0.5:
+            return "offer_hint"
+        elif score < 0.7:
+            return "trigger_ai_explanation"
+        else:
+            return "pause_and_assess"
+    def reset(self):
+        """Reset all buffers"""
+        with self.lock:
+            self.audio_history.clear()
+            self.biometric_history.clear()
+            self.behavioral_history.clear()
+class AudioAnalyzer:
+    """
+    Real-time audio analysis for confusion detection.
+    Requires: microphone input (simulated for now)
+    """
+    def __init__(self):
+        self.sample_buffer = deque(maxlen=1000)
+        self.is_recording = False
+        self.sample_rate = 16000
+    def start_recording(self):
+        """Start audio capture"""
+        self.is_recording = True
+        self.sample_buffer.clear()
+    def stop_recording(self):
+        """Stop audio capture"""
+        self.is_recording = False
+    def add_audio_sample(self, amplitude: float):
+        """Add audio amplitude sample"""
+        if self.is_recording:
+            self.sample_buffer.append({
+                'amplitude': amplitude,
+                'timestamp': time.time()
+            })
+    def analyze(self) -> AudioFeatures:
+        """Analyze audio buffer and extract features"""
+        if len(self.sample_buffer) < 100:
+            return AudioFeatures()
+        amplitudes = [s['amplitude'] for s in self.sample_buffer]
+        # Simple feature extraction
+        features = AudioFeatures()
+        # Detect pauses (low amplitude segments)
+        threshold = np.mean(amplitudes) * 0.3
+        is_pause = amplitudes < threshold
+        pause_durations = []
+        current_pause = 0
+        for p in is_pause:
+            if p:
+                current_pause += 1
+            else:
+                if current_pause > 0:
+                    pause_durations.append(current_pause)
+                current_pause = 0
+        features.pause_frequency = len(pause_durations) / (len(amplitudes) / self.sample_rate) * 60
+        features.pause_duration = np.mean(pause_durations) * 1000 / self.sample_rate if pause_durations else 0
+        # Volume level
+        features.volume_level = np.mean(amplitudes)
+        return features
+class BiometricProcessor:
+    """
+    Processes biometric data for confusion detection.
+    Supports: heart rate monitors, GSR sensors, eye trackers
+    """
+    def __init__(self):
+        self.data_buffer = deque(maxlen=60)
+    def add_reading(
+        self,
+        heart_rate: Optional[float] = None,
+        hrv: Optional[float] = None,
+        gsr: Optional[float] = None,
+        skin_temp: Optional[float] = None,
+        blink_rate: Optional[float] = None,
+        eye_open: Optional[float] = None
+    ):
+        """Add biometric reading"""
+        self.data_buffer.append({
+            'heart_rate': heart_rate,
+            'hrv': hrv,
+            'gsr': gsr,
+            'skin_temp': skin_temp,
+            'blink_rate': blink_rate,
+            'eye_open': eye_open,
+            'timestamp': time.time()
+        })
+    def analyze(self) -> BiometricFeatures:
+        """Analyze biometric buffer and extract features"""
+        if len(self.data_buffer) < 5:
+            return BiometricFeatures()
+        features = BiometricFeatures()
+        hr_values = [d['heart_rate'] for d in self.data_buffer if d['heart_rate']]
+        hrv_values = [d['hrv'] for d in self.data_buffer if d['hrv']]
+        gsr_values = [d['gsr'] for d in self.data_buffer if d['gsr']]
+        if hr_values:
+            features.heart_rate = np.mean(hr_values)
+        if hrv_values:
+            features.heart_rate_variability = np.mean(hrv_values)
+        if gsr_values:
+            features.skin_conductance = np.mean(gsr_values)
+        return features
+# API integration
+class MultiModalAPI:
+    """REST API for multi-modal confusion detection"""
+    def __init__(self, fusion: MultiModalFusion):
+        self.fusion = fusion
+        self.audio_analyzer = AudioAnalyzer()
+        self.biometric_processor = BiometricProcessor()
+    def process_audio(self, amplitude: float):
+        """Process audio sample"""
+        self.audio_analyzer.add_audio_sample(amplitude)
+        features = self.audio_analyzer.analyze()
+        self.fusion.update_audio(features)
+        return features
+    def process_biometric(
+        self,
+        heart_rate: Optional[float] = None,
+        hrv: Optional[float] = None,
+        gsr: Optional[float] = None
+    ):
+        """Process biometric data"""
+        self.biometric_processor.add_reading(
+            heart_rate=heart_rate,
+            hrv=hrv,
+            gsr=gsr
+        )
+        features = self.biometric_processor.analyze()
+        self.fusion.update_biometric(features)
+        return features
+    def process_behavioral(
+        self,
+        mouse_hesitation: float = 0,
+        scroll_reversals: float = 0,
+        time_on_page: float = 0
+    ):
+        """Process behavioral data"""
+        features = BehavioralFeatures(
+            mouse_hesitation=mouse_hesitation,
+            scroll_reversals=scroll_reversals,
+            time_on_page=time_on_page
+        )
+        self.fusion.update_behavioral(features)
+        return features
+    def get_confusion_assessment(self) -> Dict:
+        """Get multi-modal confusion assessment"""
+        return self.fusion.get_fused_confusion()
+# Demo
+if __name__ == "__main__":
+    fusion = MultiModalFusion()
+    api = MultiModalAPI(fusion)
+    print("Multi-Modal Confusion Detection Demo")
+    print("=" * 40)
+    # Simulate data collection
+    for i in range(20):
+        # Audio: increasing hesitation
+        api.process_audio(amplitude=0.3 if i < 10 else 0.1)
+        # Biometric: elevated stress
+        api.process_biometric(
+            heart_rate=75 + i * 0.5,
+            hrv=40 - i * 0.3,
+            gsr=8 + i * 0.2
+        )
+        # Behavioral: more reversals
+        api.process_behavioral(
+            mouse_hesitation=2 + i * 0.2,
+            scroll_reversals=3 + i * 0.3,
+            time_on_page=60 + i * 3
+        )
+    # Get assessment
+    result = api.get_confusion_assessment()
+    print(f"Confusion Score: {result['confusion_score']:.2f}")
+    print(f"Confidence: {result['confidence']:.2f}")
+    print(f"Primary Indicator: {result['primary_indicator']}")
+    print(f"Biometric Score: {result['biometric_score']:.2f}")
+    print(f"Behavioral Score: {result['behavioral_score']:.2f}")
+    print(f"Suggested Action: {result['suggested_action']}")