contextflow-rl / multimodal_detection.py

Upload multimodal_detection.py with huggingface_hub

82e4a98 verified 7 days ago

18.1 kB

	"""
	Multi-Modal Confusion Detection Module for ContextFlow

	Combines audio, biometric, and behavioral signals for comprehensive confusion detection.
	Addresses: Multi-modal Confusion Detection requirement
	"""

	import numpy as np
	from typing import Dict, List, Optional, Tuple, Any
	from dataclasses import dataclass, field
	from collections import deque
	import threading
	import time


	@dataclass
	class AudioFeatures:
	"""Audio features extracted from voice/speech"""
	speech_rate: float = 0.0 # Words per minute
	pause_frequency: float = 0.0 # Pauses per minute
	pause_duration: float = 0.0 # Average pause duration (ms)
	pitch_variation: float = 0.0 # Pitch standard deviation
	volume_level: float = 0.0 # Average volume (0-1)
	hesitations: int = 0 # Count of "uh", "um", etc.
	question_markers: int = 0 # Rising intonation count

	def to_vector(self) -> np.ndarray:
	"""Convert to 7-dim feature vector"""
	return np.array([
	self.speech_rate / 200, # Normalize to ~0-1
	self.pause_frequency / 10,
	self.pause_duration / 5000,
	self.pitch_variation / 50,
	self.volume_level,
	self.hesitations / 20,
	self.question_markers / 10
	])


	@dataclass
	class BiometricFeatures:
	"""Biometric features for confusion detection"""
	heart_rate: float = 0.0 # BPM
	heart_rate_variability: float = 0.0 # HRV metric
	skin_conductance: float = 0.0 # GSR (microsiemens)
	skin_temperature: float = 0.0 # Celsius
	eye_blink_rate: float = 0.0 # Blinks per minute
	eye_open_duration: float = 0.0 # Average eye open (ms)

	def to_vector(self) -> np.ndarray:
	"""Convert to 6-dim feature vector"""
	return np.array([
	(self.heart_rate - 60) / 60, # Centered at resting HR
	self.heart_rate_variability / 50,
	self.skin_conductance / 20,
	(self.skin_temperature - 36) / 2, # Centered at 36C
	(self.eye_blink_rate - 15) / 15, # Centered at normal
	self.eye_open_duration / 500
	])


	@dataclass
	class BehavioralFeatures:
	"""Behavioral features (existing confusion signals)"""
	mouse_hesitation: float = 0.0
	scroll_reversals: float = 0.0
	time_on_page: float = 0.0
	click_frequency: float = 0.0
	back_button: float = 0.0
	tab_switches: float = 0.0
	copy_attempts: float = 0.0
	search_usage: float = 0.0

	def to_vector(self) -> np.ndarray:
	"""Convert to 8-dim feature vector"""
	return np.array([
	self.mouse_hesitation / 5,
	self.scroll_reversals / 10,
	self.time_on_page / 300,
	self.click_frequency / 20,
	self.back_button / 5,
	self.tab_switches / 10,
	self.copy_attempts / 5,
	self.search_usage / 5
	])


	class MultiModalFusion:
	"""
	Fuses multiple signal modalities into unified confusion assessment.

	Supported modalities:
	- Audio: Speech patterns, hesitations
	- Biometric: Heart rate, GSR, eye tracking
	- Behavioral: Mouse, keyboard, scrolling patterns
	"""

	def __init__(
	self,
	audio_weight: float = 0.2,
	biometric_weight: float = 0.3,
	behavioral_weight: float = 0.5
	):
	self.audio_weight = audio_weight
	self.biometric_weight = biometric_weight
	self.behavioral_weight = behavioral_weight

	# Modality-specific thresholds
	self.audio_threshold = 0.6
	self.biometric_threshold = 0.7
	self.behavioral_threshold = 0.5

	# History buffers
	self.audio_history = deque(maxlen=30) # Last 30 seconds
	self.biometric_history = deque(maxlen=60) # Last 60 seconds
	self.behavioral_history = deque(maxlen=100) # Last 100 events

	# Thread safety
	self.lock = threading.Lock()

	def update_audio(self, features: AudioFeatures):
	"""Update audio feature buffer"""
	with self.lock:
	self.audio_history.append(features)

	def update_biometric(self, features: BiometricFeatures):
	"""Update biometric feature buffer"""
	with self.lock:
	self.biometric_history.append(features)

	def update_behavioral(self, features: BehavioralFeatures):
	"""Update behavioral feature buffer"""
	with self.lock:
	self.behavioral_history.append(features)

	def get_audio_confusion(self) -> Tuple[float, str]:
	"""Get confusion score from audio signals"""
	with self.lock:
	if not self.audio_history:
	return 0.0, "no_audio"

	recent = list(self.audio_history)[-10:] # Last 10 samples

	# Compute weighted features
	speech_rate = np.mean([f.speech_rate for f in recent])
	hesitations = np.mean([f.hesitations for f in recent])
	pause_freq = np.mean([f.pause_frequency for f in recent])

	# Confusion indicators:
	# - Slower speech rate
	# - More hesitations
	# - More pauses

	confusion = 0.0

	if speech_rate < 120: # Slow speech
	confusion += 0.3
	if hesitations > 5: # Many hesitations
	confusion += 0.4
	if pause_freq > 3: # Frequent pauses
	confusion += 0.3

	confusion = min(confusion, 1.0)

	return confusion, self._get_audio_reason(hesitations, speech_rate, pause_freq)

	def _get_audio_reason(self, hesitations: float, speech_rate: float, pause_freq: float) -> str:
	"""Generate human-readable audio confusion reason"""
	reasons = []
	if hesitations > 5:
	reasons.append("frequent_hesitations")
	if speech_rate < 120:
	reasons.append("slow_speech")
	if pause_freq > 3:
	reasons.append("frequent_pauses")
	return ",".join(reasons) if reasons else "normal"

	def get_biometric_confusion(self) -> Tuple[float, str]:
	"""Get confusion score from biometric signals"""
	with self.lock:
	if not self.biometric_history:
	return 0.0, "no_biometric"

	recent = list(self.biometric_history)[-20:] # Last 20 samples

	hr = np.mean([f.heart_rate for f in recent])
	hrv = np.mean([f.heart_rate_variability for f in recent])
	gsr = np.mean([f.skin_conductance for f in recent])

	# Confusion indicators:
	# - Elevated heart rate
	# - Lower HRV (stress indicator)
	# - Higher GSR (arousal)

	confusion = 0.0

	if hr > 85: # Elevated HR
	confusion += 0.3
	if hrv < 30: # Low HRV
	confusion += 0.3
	if gsr > 10: # Elevated GSR
	confusion += 0.4

	confusion = min(confusion, 1.0)

	return confusion, self._get_biometric_reason(hr, hrv, gsr)

	def _get_biometric_reason(self, hr: float, hrv: float, gsr: float) -> str:
	"""Generate human-readable biometric confusion reason"""
	reasons = []
	if hr > 85:
	reasons.append("elevated_heart_rate")
	if hrv < 30:
	reasons.append("low_hrv")
	if gsr > 10:
	reasons.append("high_arousal")
	return ",".join(reasons) if reasons else "normal"

	def get_behavioral_confusion(self) -> Tuple[float, str]:
	"""Get confusion score from behavioral signals"""
	with self.lock:
	if not self.behavioral_history:
	return 0.0, "no_behavioral"

	recent = list(self.behavioral_history)[-20:] # Last 20 events

	mouse_h = np.mean([f.mouse_hesitation for f in recent])
	scrolls = np.mean([f.scroll_reversals for f in recent])
	back_btn = np.mean([f.back_button for f in recent])

	confusion = 0.0

	if mouse_h > 3:
	confusion += 0.3
	if scrolls > 5:
	confusion += 0.3
	if back_btn > 3:
	confusion += 0.2

	confusion = min(confusion, 1.0)

	return confusion, self._get_behavioral_reason(mouse_h, scrolls, back_btn)

	def _get_behavioral_reason(self, mouse_h: float, scrolls: float, back_btn: float) -> str:
	"""Generate human-readable behavioral confusion reason"""
	reasons = []
	if mouse_h > 3:
	reasons.append("mouse_hesitation")
	if scrolls > 5:
	reasons.append("scroll_reversals")
	if back_btn > 3:
	reasons.append("back_button_usage")
	return ",".join(reasons) if reasons else "normal"

	def get_fused_confusion(self) -> Dict[str, Any]:
	"""
	Get fused multi-modal confusion assessment.

	Returns:
	Dict with confusion scores, reasons, and confidence
	"""
	audio_score, audio_reason = self.get_audio_confusion()
	biometric_score, biometric_reason = self.get_biometric_confusion()
	behavioral_score, behavioral_reason = self.get_behavioral_confusion()

	# Weighted fusion
	fused_score = (
	audio_score * self.audio_weight +
	biometric_score * self.biometric_weight +
	behavioral_score * self.behavioral_weight
	)

	# Confidence based on signal availability
	n_signals = sum([
	len(self.audio_history) > 0,
	len(self.biometric_history) > 0,
	len(self.behavioral_history) > 0
	])
	confidence = min(n_signals / 3.0, 1.0)

	# Primary indicator (highest weighted contribution)
	contributions = {
	'audio': audio_score * self.audio_weight,
	'biometric': biometric_score * self.biometric_weight,
	'behavioral': behavioral_score * self.behavioral_weight
	}
	primary_indicator = max(contributions, key=contributions.get)

	return {
	'confusion_score': fused_score,
	'confidence': confidence,
	'primary_indicator': primary_indicator,
	'audio_score': audio_score,
	'biometric_score': biometric_score,
	'behavioral_score': behavioral_score,
	'audio_reason': audio_reason,
	'biometric_reason': biometric_reason,
	'behavioral_reason': behavioral_reason,
	'suggested_action': self._get_suggested_action(fused_score, primary_indicator),
	'available_modalities': {
	'audio': len(self.audio_history) > 0,
	'biometric': len(self.biometric_history) > 0,
	'behavioral': len(self.behavioral_history) > 0
	}
	}

	def _get_suggested_action(self, score: float, primary: str) -> str:
	"""Get suggested intervention based on confusion level"""
	if score < 0.3:
	return "continue_learning"
	elif score < 0.5:
	return "offer_hint"
	elif score < 0.7:
	return "trigger_ai_explanation"
	else:
	return "pause_and_assess"

	def reset(self):
	"""Reset all buffers"""
	with self.lock:
	self.audio_history.clear()
	self.biometric_history.clear()
	self.behavioral_history.clear()


	class AudioAnalyzer:
	"""
	Real-time audio analysis for confusion detection.

	Requires: microphone input (simulated for now)
	"""

	def __init__(self):
	self.sample_buffer = deque(maxlen=1000)
	self.is_recording = False
	self.sample_rate = 16000

	def start_recording(self):
	"""Start audio capture"""
	self.is_recording = True
	self.sample_buffer.clear()

	def stop_recording(self):
	"""Stop audio capture"""
	self.is_recording = False

	def add_audio_sample(self, amplitude: float):
	"""Add audio amplitude sample"""
	if self.is_recording:
	self.sample_buffer.append({
	'amplitude': amplitude,
	'timestamp': time.time()
	})

	def analyze(self) -> AudioFeatures:
	"""Analyze audio buffer and extract features"""
	if len(self.sample_buffer) < 100:
	return AudioFeatures()

	amplitudes = [s['amplitude'] for s in self.sample_buffer]

	# Simple feature extraction
	features = AudioFeatures()

	# Detect pauses (low amplitude segments)
	threshold = np.mean(amplitudes) * 0.3
	is_pause = amplitudes < threshold
	pause_durations = []
	current_pause = 0

	for p in is_pause:
	if p:
	current_pause += 1
	else:
	if current_pause > 0:
	pause_durations.append(current_pause)
	current_pause = 0

	features.pause_frequency = len(pause_durations) / (len(amplitudes) / self.sample_rate) * 60
	features.pause_duration = np.mean(pause_durations) * 1000 / self.sample_rate if pause_durations else 0

	# Volume level
	features.volume_level = np.mean(amplitudes)

	return features


	class BiometricProcessor:
	"""
	Processes biometric data for confusion detection.

	Supports: heart rate monitors, GSR sensors, eye trackers
	"""

	def __init__(self):
	self.data_buffer = deque(maxlen=60)

	def add_reading(
	self,
	heart_rate: Optional[float] = None,
	hrv: Optional[float] = None,
	gsr: Optional[float] = None,
	skin_temp: Optional[float] = None,
	blink_rate: Optional[float] = None,
	eye_open: Optional[float] = None
	):
	"""Add biometric reading"""
	self.data_buffer.append({
	'heart_rate': heart_rate,
	'hrv': hrv,
	'gsr': gsr,
	'skin_temp': skin_temp,
	'blink_rate': blink_rate,
	'eye_open': eye_open,
	'timestamp': time.time()
	})

	def analyze(self) -> BiometricFeatures:
	"""Analyze biometric buffer and extract features"""
	if len(self.data_buffer) < 5:
	return BiometricFeatures()

	features = BiometricFeatures()

	hr_values = [d['heart_rate'] for d in self.data_buffer if d['heart_rate']]
	hrv_values = [d['hrv'] for d in self.data_buffer if d['hrv']]
	gsr_values = [d['gsr'] for d in self.data_buffer if d['gsr']]

	if hr_values:
	features.heart_rate = np.mean(hr_values)
	if hrv_values:
	features.heart_rate_variability = np.mean(hrv_values)
	if gsr_values:
	features.skin_conductance = np.mean(gsr_values)

	return features


	# API integration
	class MultiModalAPI:
	"""REST API for multi-modal confusion detection"""

	def __init__(self, fusion: MultiModalFusion):
	self.fusion = fusion
	self.audio_analyzer = AudioAnalyzer()
	self.biometric_processor = BiometricProcessor()

	def process_audio(self, amplitude: float):
	"""Process audio sample"""
	self.audio_analyzer.add_audio_sample(amplitude)
	features = self.audio_analyzer.analyze()
	self.fusion.update_audio(features)
	return features

	def process_biometric(
	self,
	heart_rate: Optional[float] = None,
	hrv: Optional[float] = None,
	gsr: Optional[float] = None
	):
	"""Process biometric data"""
	self.biometric_processor.add_reading(
	heart_rate=heart_rate,
	hrv=hrv,
	gsr=gsr
	)
	features = self.biometric_processor.analyze()
	self.fusion.update_biometric(features)
	return features

	def process_behavioral(
	self,
	mouse_hesitation: float = 0,
	scroll_reversals: float = 0,
	time_on_page: float = 0
	):
	"""Process behavioral data"""
	features = BehavioralFeatures(
	mouse_hesitation=mouse_hesitation,
	scroll_reversals=scroll_reversals,
	time_on_page=time_on_page
	)
	self.fusion.update_behavioral(features)
	return features

	def get_confusion_assessment(self) -> Dict:
	"""Get multi-modal confusion assessment"""
	return self.fusion.get_fused_confusion()


	# Demo
	if __name__ == "__main__":
	fusion = MultiModalFusion()
	api = MultiModalAPI(fusion)

	print("Multi-Modal Confusion Detection Demo")
	print("=" * 40)

	# Simulate data collection
	for i in range(20):
	# Audio: increasing hesitation
	api.process_audio(amplitude=0.3 if i < 10 else 0.1)

	# Biometric: elevated stress
	api.process_biometric(
	heart_rate=75 + i * 0.5,
	hrv=40 - i * 0.3,
	gsr=8 + i * 0.2
	)

	# Behavioral: more reversals
	api.process_behavioral(
	mouse_hesitation=2 + i * 0.2,
	scroll_reversals=3 + i * 0.3,
	time_on_page=60 + i * 3
	)

	# Get assessment
	result = api.get_confusion_assessment()

	print(f"Confusion Score: {result['confusion_score']:.2f}")
	print(f"Confidence: {result['confidence']:.2f}")
	print(f"Primary Indicator: {result['primary_indicator']}")
	print(f"Biometric Score: {result['biometric_score']:.2f}")
	print(f"Behavioral Score: {result['behavioral_score']:.2f}")
	print(f"Suggested Action: {result['suggested_action']}")