Spaces:

ub-aac-chatbot
/

aac-chatbot

Sleeping

App Files Files Community

aac-chatbot / backend /evals /multimodal_alignment.py

shwetangisingh

relevance + diversity evals, batched scoring, math tooltips

69345ca 15 days ago

raw

history blame contribute delete

3.89 kB

	import re

	_POSITIVE = {
	"glad",
	"love",
	"lucky",
	"happy",
	"great",
	"grateful",
	"fun",
	"wonderful",
	"nice",
	"amazing",
	"delighted",
	"pleased",
	"yes",
	"solid",
	}
	_NEGATIVE = {
	"tired",
	"hard",
	"sorry",
	"unfortunately",
	"bad",
	"awful",
	"regrettably",
	"difficult",
	"frustrating",
	"no",
	"stop",
	}

	_AFFECT_TARGET = {
	"HAPPY": 1.0,
	"FRUSTRATED": -0.5,
	"NEUTRAL": 0.0,
	"SURPRISED": 0.0,
	}

	_GESTURE_OPENER_PATTERNS = {
	"THUMBS_UP": re.compile(r"^\s*(yes\|yeah\|totally\|for sure\|absolutely\|sure)\b", re.I),
	"THUMBS_DOWN": re.compile(r"^\s*(no\|nah\|not really\|i'd rather not)\b", re.I),
	"OPEN_PALM": re.compile(r"^\s*(hi\|hey\|hello)\b", re.I),
	"VICTORY": re.compile(r"^\s*(yes\|awesome\|great\|fantastic\|amazing\|woo)\b", re.I),
	"I_LOVE_YOU": re.compile(r"^\s*(love\|i love\|adore\|care)\b", re.I),
	}


	def _tokens(text: str) -> set[str]:
	return set(re.findall(r"\b[a-z]+\b", text.lower()))


	def _sentiment_score(text: str) -> float:
	toks = _tokens(text)
	pos = len(toks & _POSITIVE)
	neg = len(toks & _NEGATIVE)
	if pos == 0 and neg == 0:
	return 0.0
	return (pos - neg) / (pos + neg)


	def _affect_alignment(response: str, affect: str \| None) -> float:
	if not affect:
	return 0.0
	target = _AFFECT_TARGET.get(affect, 0.0)
	score = _sentiment_score(response)
	# distance in [0, 2] → similarity in [0, 1]
	return max(0.0, 1.0 - abs(score - target) / 2.0)


	def _gesture_alignment(response: str, gesture_tag: str \| None) -> float:
	if not gesture_tag:
	return 0.0
	pattern = _GESTURE_OPENER_PATTERNS.get(gesture_tag)
	if pattern is None:
	return 0.5 # gesture has no testable opener; give partial credit
	return 1.0 if pattern.search(response) else 0.0


	def _gaze_alignment(
	chunks: list[dict], gaze_bucket: str \| None
	) -> tuple[float, int, int]:
	if not gaze_bucket or not chunks:
	return 0.0, 0, len(chunks) if chunks else 0
	matches = sum(1 for c in chunks if c.get("bucket") == gaze_bucket)
	return matches / len(chunks), matches, len(chunks)


	def _affect_breakdown(response: str) -> tuple[int, int]:
	toks = _tokens(response)
	return len(toks & _POSITIVE), len(toks & _NEGATIVE)


	def compute_multimodal_alignment(
	response: str,
	affect: str \| None,
	gesture_tag: str \| None,
	gaze_bucket: str \| None,
	chunks: list[dict],
	) -> dict:
	scores: dict[str, float] = {}
	explain: dict[str, dict] = {}
	if affect:
	scores["affect_alignment"] = _affect_alignment(response, affect)
	pos, neg = _affect_breakdown(response)
	explain["affect"] = {
	"target": affect,
	"pos_words": pos,
	"neg_words": neg,
	"sentiment": round(_sentiment_score(response), 4),
	}
	if gesture_tag:
	scores["gesture_alignment"] = _gesture_alignment(response, gesture_tag)
	pattern = _GESTURE_OPENER_PATTERNS.get(gesture_tag)
	explain["gesture"] = {
	"tag": gesture_tag,
	"has_pattern": pattern is not None,
	"matched": bool(pattern.search(response)) if pattern else None,
	}
	if gaze_bucket:
	score, matches, total = _gaze_alignment(chunks, gaze_bucket)
	scores["gaze_alignment"] = score
	explain["gaze"] = {
	"bucket": gaze_bucket,
	"matched_chunks": matches,
	"total_chunks": total,
	}
	overall = sum(scores.values()) / len(scores) if scores else 0.0
	return {
	"overall_score": round(overall, 4),
	"affect_alignment": round(scores.get("affect_alignment", 0.0), 4),
	"gesture_alignment": round(scores.get("gesture_alignment", 0.0), 4),
	"gaze_alignment": round(scores.get("gaze_alignment", 0.0), 4),
	"explain": explain,
	}