contextflow-rl / multimodal_detection.py
namish10's picture
Upload multimodal_detection.py with huggingface_hub
82e4a98 verified
"""
Multi-Modal Confusion Detection Module for ContextFlow
Combines audio, biometric, and behavioral signals for comprehensive confusion detection.
Addresses: Multi-modal Confusion Detection requirement
"""
import numpy as np
from typing import Dict, List, Optional, Tuple, Any
from dataclasses import dataclass, field
from collections import deque
import threading
import time
@dataclass
class AudioFeatures:
"""Audio features extracted from voice/speech"""
speech_rate: float = 0.0 # Words per minute
pause_frequency: float = 0.0 # Pauses per minute
pause_duration: float = 0.0 # Average pause duration (ms)
pitch_variation: float = 0.0 # Pitch standard deviation
volume_level: float = 0.0 # Average volume (0-1)
hesitations: int = 0 # Count of "uh", "um", etc.
question_markers: int = 0 # Rising intonation count
def to_vector(self) -> np.ndarray:
"""Convert to 7-dim feature vector"""
return np.array([
self.speech_rate / 200, # Normalize to ~0-1
self.pause_frequency / 10,
self.pause_duration / 5000,
self.pitch_variation / 50,
self.volume_level,
self.hesitations / 20,
self.question_markers / 10
])
@dataclass
class BiometricFeatures:
"""Biometric features for confusion detection"""
heart_rate: float = 0.0 # BPM
heart_rate_variability: float = 0.0 # HRV metric
skin_conductance: float = 0.0 # GSR (microsiemens)
skin_temperature: float = 0.0 # Celsius
eye_blink_rate: float = 0.0 # Blinks per minute
eye_open_duration: float = 0.0 # Average eye open (ms)
def to_vector(self) -> np.ndarray:
"""Convert to 6-dim feature vector"""
return np.array([
(self.heart_rate - 60) / 60, # Centered at resting HR
self.heart_rate_variability / 50,
self.skin_conductance / 20,
(self.skin_temperature - 36) / 2, # Centered at 36C
(self.eye_blink_rate - 15) / 15, # Centered at normal
self.eye_open_duration / 500
])
@dataclass
class BehavioralFeatures:
"""Behavioral features (existing confusion signals)"""
mouse_hesitation: float = 0.0
scroll_reversals: float = 0.0
time_on_page: float = 0.0
click_frequency: float = 0.0
back_button: float = 0.0
tab_switches: float = 0.0
copy_attempts: float = 0.0
search_usage: float = 0.0
def to_vector(self) -> np.ndarray:
"""Convert to 8-dim feature vector"""
return np.array([
self.mouse_hesitation / 5,
self.scroll_reversals / 10,
self.time_on_page / 300,
self.click_frequency / 20,
self.back_button / 5,
self.tab_switches / 10,
self.copy_attempts / 5,
self.search_usage / 5
])
class MultiModalFusion:
"""
Fuses multiple signal modalities into unified confusion assessment.
Supported modalities:
- Audio: Speech patterns, hesitations
- Biometric: Heart rate, GSR, eye tracking
- Behavioral: Mouse, keyboard, scrolling patterns
"""
def __init__(
self,
audio_weight: float = 0.2,
biometric_weight: float = 0.3,
behavioral_weight: float = 0.5
):
self.audio_weight = audio_weight
self.biometric_weight = biometric_weight
self.behavioral_weight = behavioral_weight
# Modality-specific thresholds
self.audio_threshold = 0.6
self.biometric_threshold = 0.7
self.behavioral_threshold = 0.5
# History buffers
self.audio_history = deque(maxlen=30) # Last 30 seconds
self.biometric_history = deque(maxlen=60) # Last 60 seconds
self.behavioral_history = deque(maxlen=100) # Last 100 events
# Thread safety
self.lock = threading.Lock()
def update_audio(self, features: AudioFeatures):
"""Update audio feature buffer"""
with self.lock:
self.audio_history.append(features)
def update_biometric(self, features: BiometricFeatures):
"""Update biometric feature buffer"""
with self.lock:
self.biometric_history.append(features)
def update_behavioral(self, features: BehavioralFeatures):
"""Update behavioral feature buffer"""
with self.lock:
self.behavioral_history.append(features)
def get_audio_confusion(self) -> Tuple[float, str]:
"""Get confusion score from audio signals"""
with self.lock:
if not self.audio_history:
return 0.0, "no_audio"
recent = list(self.audio_history)[-10:] # Last 10 samples
# Compute weighted features
speech_rate = np.mean([f.speech_rate for f in recent])
hesitations = np.mean([f.hesitations for f in recent])
pause_freq = np.mean([f.pause_frequency for f in recent])
# Confusion indicators:
# - Slower speech rate
# - More hesitations
# - More pauses
confusion = 0.0
if speech_rate < 120: # Slow speech
confusion += 0.3
if hesitations > 5: # Many hesitations
confusion += 0.4
if pause_freq > 3: # Frequent pauses
confusion += 0.3
confusion = min(confusion, 1.0)
return confusion, self._get_audio_reason(hesitations, speech_rate, pause_freq)
def _get_audio_reason(self, hesitations: float, speech_rate: float, pause_freq: float) -> str:
"""Generate human-readable audio confusion reason"""
reasons = []
if hesitations > 5:
reasons.append("frequent_hesitations")
if speech_rate < 120:
reasons.append("slow_speech")
if pause_freq > 3:
reasons.append("frequent_pauses")
return ",".join(reasons) if reasons else "normal"
def get_biometric_confusion(self) -> Tuple[float, str]:
"""Get confusion score from biometric signals"""
with self.lock:
if not self.biometric_history:
return 0.0, "no_biometric"
recent = list(self.biometric_history)[-20:] # Last 20 samples
hr = np.mean([f.heart_rate for f in recent])
hrv = np.mean([f.heart_rate_variability for f in recent])
gsr = np.mean([f.skin_conductance for f in recent])
# Confusion indicators:
# - Elevated heart rate
# - Lower HRV (stress indicator)
# - Higher GSR (arousal)
confusion = 0.0
if hr > 85: # Elevated HR
confusion += 0.3
if hrv < 30: # Low HRV
confusion += 0.3
if gsr > 10: # Elevated GSR
confusion += 0.4
confusion = min(confusion, 1.0)
return confusion, self._get_biometric_reason(hr, hrv, gsr)
def _get_biometric_reason(self, hr: float, hrv: float, gsr: float) -> str:
"""Generate human-readable biometric confusion reason"""
reasons = []
if hr > 85:
reasons.append("elevated_heart_rate")
if hrv < 30:
reasons.append("low_hrv")
if gsr > 10:
reasons.append("high_arousal")
return ",".join(reasons) if reasons else "normal"
def get_behavioral_confusion(self) -> Tuple[float, str]:
"""Get confusion score from behavioral signals"""
with self.lock:
if not self.behavioral_history:
return 0.0, "no_behavioral"
recent = list(self.behavioral_history)[-20:] # Last 20 events
mouse_h = np.mean([f.mouse_hesitation for f in recent])
scrolls = np.mean([f.scroll_reversals for f in recent])
back_btn = np.mean([f.back_button for f in recent])
confusion = 0.0
if mouse_h > 3:
confusion += 0.3
if scrolls > 5:
confusion += 0.3
if back_btn > 3:
confusion += 0.2
confusion = min(confusion, 1.0)
return confusion, self._get_behavioral_reason(mouse_h, scrolls, back_btn)
def _get_behavioral_reason(self, mouse_h: float, scrolls: float, back_btn: float) -> str:
"""Generate human-readable behavioral confusion reason"""
reasons = []
if mouse_h > 3:
reasons.append("mouse_hesitation")
if scrolls > 5:
reasons.append("scroll_reversals")
if back_btn > 3:
reasons.append("back_button_usage")
return ",".join(reasons) if reasons else "normal"
def get_fused_confusion(self) -> Dict[str, Any]:
"""
Get fused multi-modal confusion assessment.
Returns:
Dict with confusion scores, reasons, and confidence
"""
audio_score, audio_reason = self.get_audio_confusion()
biometric_score, biometric_reason = self.get_biometric_confusion()
behavioral_score, behavioral_reason = self.get_behavioral_confusion()
# Weighted fusion
fused_score = (
audio_score * self.audio_weight +
biometric_score * self.biometric_weight +
behavioral_score * self.behavioral_weight
)
# Confidence based on signal availability
n_signals = sum([
len(self.audio_history) > 0,
len(self.biometric_history) > 0,
len(self.behavioral_history) > 0
])
confidence = min(n_signals / 3.0, 1.0)
# Primary indicator (highest weighted contribution)
contributions = {
'audio': audio_score * self.audio_weight,
'biometric': biometric_score * self.biometric_weight,
'behavioral': behavioral_score * self.behavioral_weight
}
primary_indicator = max(contributions, key=contributions.get)
return {
'confusion_score': fused_score,
'confidence': confidence,
'primary_indicator': primary_indicator,
'audio_score': audio_score,
'biometric_score': biometric_score,
'behavioral_score': behavioral_score,
'audio_reason': audio_reason,
'biometric_reason': biometric_reason,
'behavioral_reason': behavioral_reason,
'suggested_action': self._get_suggested_action(fused_score, primary_indicator),
'available_modalities': {
'audio': len(self.audio_history) > 0,
'biometric': len(self.biometric_history) > 0,
'behavioral': len(self.behavioral_history) > 0
}
}
def _get_suggested_action(self, score: float, primary: str) -> str:
"""Get suggested intervention based on confusion level"""
if score < 0.3:
return "continue_learning"
elif score < 0.5:
return "offer_hint"
elif score < 0.7:
return "trigger_ai_explanation"
else:
return "pause_and_assess"
def reset(self):
"""Reset all buffers"""
with self.lock:
self.audio_history.clear()
self.biometric_history.clear()
self.behavioral_history.clear()
class AudioAnalyzer:
"""
Real-time audio analysis for confusion detection.
Requires: microphone input (simulated for now)
"""
def __init__(self):
self.sample_buffer = deque(maxlen=1000)
self.is_recording = False
self.sample_rate = 16000
def start_recording(self):
"""Start audio capture"""
self.is_recording = True
self.sample_buffer.clear()
def stop_recording(self):
"""Stop audio capture"""
self.is_recording = False
def add_audio_sample(self, amplitude: float):
"""Add audio amplitude sample"""
if self.is_recording:
self.sample_buffer.append({
'amplitude': amplitude,
'timestamp': time.time()
})
def analyze(self) -> AudioFeatures:
"""Analyze audio buffer and extract features"""
if len(self.sample_buffer) < 100:
return AudioFeatures()
amplitudes = [s['amplitude'] for s in self.sample_buffer]
# Simple feature extraction
features = AudioFeatures()
# Detect pauses (low amplitude segments)
threshold = np.mean(amplitudes) * 0.3
is_pause = amplitudes < threshold
pause_durations = []
current_pause = 0
for p in is_pause:
if p:
current_pause += 1
else:
if current_pause > 0:
pause_durations.append(current_pause)
current_pause = 0
features.pause_frequency = len(pause_durations) / (len(amplitudes) / self.sample_rate) * 60
features.pause_duration = np.mean(pause_durations) * 1000 / self.sample_rate if pause_durations else 0
# Volume level
features.volume_level = np.mean(amplitudes)
return features
class BiometricProcessor:
"""
Processes biometric data for confusion detection.
Supports: heart rate monitors, GSR sensors, eye trackers
"""
def __init__(self):
self.data_buffer = deque(maxlen=60)
def add_reading(
self,
heart_rate: Optional[float] = None,
hrv: Optional[float] = None,
gsr: Optional[float] = None,
skin_temp: Optional[float] = None,
blink_rate: Optional[float] = None,
eye_open: Optional[float] = None
):
"""Add biometric reading"""
self.data_buffer.append({
'heart_rate': heart_rate,
'hrv': hrv,
'gsr': gsr,
'skin_temp': skin_temp,
'blink_rate': blink_rate,
'eye_open': eye_open,
'timestamp': time.time()
})
def analyze(self) -> BiometricFeatures:
"""Analyze biometric buffer and extract features"""
if len(self.data_buffer) < 5:
return BiometricFeatures()
features = BiometricFeatures()
hr_values = [d['heart_rate'] for d in self.data_buffer if d['heart_rate']]
hrv_values = [d['hrv'] for d in self.data_buffer if d['hrv']]
gsr_values = [d['gsr'] for d in self.data_buffer if d['gsr']]
if hr_values:
features.heart_rate = np.mean(hr_values)
if hrv_values:
features.heart_rate_variability = np.mean(hrv_values)
if gsr_values:
features.skin_conductance = np.mean(gsr_values)
return features
# API integration
class MultiModalAPI:
"""REST API for multi-modal confusion detection"""
def __init__(self, fusion: MultiModalFusion):
self.fusion = fusion
self.audio_analyzer = AudioAnalyzer()
self.biometric_processor = BiometricProcessor()
def process_audio(self, amplitude: float):
"""Process audio sample"""
self.audio_analyzer.add_audio_sample(amplitude)
features = self.audio_analyzer.analyze()
self.fusion.update_audio(features)
return features
def process_biometric(
self,
heart_rate: Optional[float] = None,
hrv: Optional[float] = None,
gsr: Optional[float] = None
):
"""Process biometric data"""
self.biometric_processor.add_reading(
heart_rate=heart_rate,
hrv=hrv,
gsr=gsr
)
features = self.biometric_processor.analyze()
self.fusion.update_biometric(features)
return features
def process_behavioral(
self,
mouse_hesitation: float = 0,
scroll_reversals: float = 0,
time_on_page: float = 0
):
"""Process behavioral data"""
features = BehavioralFeatures(
mouse_hesitation=mouse_hesitation,
scroll_reversals=scroll_reversals,
time_on_page=time_on_page
)
self.fusion.update_behavioral(features)
return features
def get_confusion_assessment(self) -> Dict:
"""Get multi-modal confusion assessment"""
return self.fusion.get_fused_confusion()
# Demo
if __name__ == "__main__":
fusion = MultiModalFusion()
api = MultiModalAPI(fusion)
print("Multi-Modal Confusion Detection Demo")
print("=" * 40)
# Simulate data collection
for i in range(20):
# Audio: increasing hesitation
api.process_audio(amplitude=0.3 if i < 10 else 0.1)
# Biometric: elevated stress
api.process_biometric(
heart_rate=75 + i * 0.5,
hrv=40 - i * 0.3,
gsr=8 + i * 0.2
)
# Behavioral: more reversals
api.process_behavioral(
mouse_hesitation=2 + i * 0.2,
scroll_reversals=3 + i * 0.3,
time_on_page=60 + i * 3
)
# Get assessment
result = api.get_confusion_assessment()
print(f"Confusion Score: {result['confusion_score']:.2f}")
print(f"Confidence: {result['confidence']:.2f}")
print(f"Primary Indicator: {result['primary_indicator']}")
print(f"Biometric Score: {result['biometric_score']:.2f}")
print(f"Behavioral Score: {result['behavioral_score']:.2f}")
print(f"Suggested Action: {result['suggested_action']}")