Spaces:

moonlantern1
/

clipforge

Sleeping

App Files Files Community

clipforge / humeo-core /tests /test_vision.py

moonlantern1

Deploy ClipForge Docker Space

eda316b verified 11 days ago

raw

history blame contribute delete

7.6 kB

	"""Tests for the scene-change + vision-LLM + OCR bbox primitive.

	Covers:
	* happy path: well-formed JSON -> populated ``SceneRegions``.
	* bad JSON: degrade to empty regions + raw_reason, never raise.
	* bad bbox: one malformed bbox does not take down the whole scene record.
	* classification dispatch: chart width -> SPLIT; wide person -> ZOOM; else SIT.
	* layout instruction derivation: ``person_x_norm`` / ``chart_x_norm`` come
	from the bboxes when present, defaults when not.
	"""

	import json

	import pytest

	from humeo_core.primitives.vision import (
	_CHART_WIDTH_SPLIT_THRESHOLD,
	classify_from_regions,
	classify_scenes_with_vision_llm,
	detect_regions_with_llm,
	layout_instruction_from_regions,
	)
	from humeo_core.schemas import (
	BoundingBox,
	LayoutKind,
	Scene,
	SceneClassification,
	SceneRegions,
	)


	# ---------------------------------------------------------------------------
	# Schema
	# ---------------------------------------------------------------------------


	def test_bounding_box_requires_x2_gt_x1():
	BoundingBox(x1=0.1, y1=0.1, x2=0.2, y2=0.2)
	with pytest.raises(ValueError):
	BoundingBox(x1=0.2, y1=0.1, x2=0.1, y2=0.2)
	with pytest.raises(ValueError):
	BoundingBox(x1=0.1, y1=0.2, x2=0.2, y2=0.1)


	def test_bounding_box_center_and_width():
	b = BoundingBox(x1=0.2, y1=0.4, x2=0.6, y2=0.9)
	assert b.center_x == pytest.approx(0.4)
	assert b.center_y == pytest.approx(0.65)
	assert b.width == pytest.approx(0.4)


	# ---------------------------------------------------------------------------
	# detect_regions_with_llm
	# ---------------------------------------------------------------------------


	def _scene(i: int, kf: str \| None = "/tmp/x.jpg") -> Scene:
	return Scene(scene_id=f"s{i}", start_time=float(i), end_time=float(i) + 1.0, keyframe_path=kf)


	def test_detect_regions_happy_path():
	scenes = [_scene(0)]

	def vision_fn(_img: str, _prompt: str) -> str:
	return json.dumps(
	{
	"person_bbox": {"x1": 0.7, "y1": 0.1, "x2": 0.98, "y2": 0.9, "confidence": 0.9},
	"chart_bbox": {"x1": 0.02, "y1": 0.05, "x2": 0.65, "y2": 0.95, "confidence": 0.8},
	"ocr_text": "Inflation YoY",
	"reason": "explainer layout",
	}
	)

	out = detect_regions_with_llm(scenes, vision_fn)
	assert len(out) == 1
	r = out[0]
	assert r.scene_id == "s0"
	assert r.person_bbox and r.person_bbox.center_x > 0.8
	assert r.chart_bbox and r.chart_bbox.width > 0.6
	assert "Inflation" in r.ocr_text


	def test_detect_regions_bad_json_is_safe():
	scenes = [_scene(0)]

	def vision_fn(*_a) -> str:
	return "not json"

	out = detect_regions_with_llm(scenes, vision_fn)
	assert out[0].person_bbox is None
	assert out[0].chart_bbox is None
	assert "parse error" in out[0].raw_reason.lower()


	def test_detect_regions_missing_keyframe_is_safe():
	scenes = [_scene(0, kf=None)]

	def vision_fn(*_a) -> str: # pragma: no cover - should not be called
	raise AssertionError("vision_fn must not be called without a keyframe")

	out = detect_regions_with_llm(scenes, vision_fn)
	assert out[0].person_bbox is None
	assert "no keyframe" in out[0].raw_reason.lower()


	def test_detect_regions_bad_bbox_degrades_gracefully():
	scenes = [_scene(0)]

	def vision_fn(*_a) -> str:
	return json.dumps(
	{
	"person_bbox": {"x1": 0.5, "y1": 0.1, "x2": 0.3, "y2": 0.9},
	"chart_bbox": {"x1": 0.02, "y1": 0.05, "x2": 0.65, "y2": 0.95},
	"ocr_text": "",
	"reason": "person bbox inverted",
	}
	)

	out = detect_regions_with_llm(scenes, vision_fn)
	assert out[0].person_bbox is None
	assert out[0].chart_bbox is not None


	# ---------------------------------------------------------------------------
	# classify_from_regions
	# ---------------------------------------------------------------------------


	def test_classify_wide_chart_is_split():
	r = SceneRegions(
	scene_id="s0",
	chart_bbox=BoundingBox(x1=0.0, y1=0.0, x2=0.66, y2=1.0),
	person_bbox=BoundingBox(x1=0.72, y1=0.1, x2=0.99, y2=0.95),
	)
	c = classify_from_regions(r)
	assert c.layout == LayoutKind.SPLIT_CHART_PERSON
	assert c.confidence > 0.5


	def test_classify_narrow_chart_not_split():
	r = SceneRegions(
	scene_id="s0",
	chart_bbox=BoundingBox(x1=0.4, y1=0.2, x2=0.5, y2=0.4),
	person_bbox=BoundingBox(x1=0.3, y1=0.1, x2=0.85, y2=0.95),
	)
	c = classify_from_regions(r)
	# chart width (0.1) is below the split threshold -> not split
	assert c.layout != LayoutKind.SPLIT_CHART_PERSON


	def test_classify_wide_person_is_zoom_call():
	r = SceneRegions(
	scene_id="s0",
	person_bbox=BoundingBox(x1=0.1, y1=0.05, x2=0.9, y2=0.98),
	)
	c = classify_from_regions(r)
	assert c.layout == LayoutKind.ZOOM_CALL_CENTER


	def test_classify_small_person_is_sit_center():
	r = SceneRegions(
	scene_id="s0",
	person_bbox=BoundingBox(x1=0.4, y1=0.2, x2=0.6, y2=0.8),
	)
	c = classify_from_regions(r)
	assert c.layout == LayoutKind.SIT_CENTER


	def test_classify_nothing_detected_defaults_sit_center_low_conf():
	r = SceneRegions(scene_id="s0", raw_reason="model returned null")
	c = classify_from_regions(r)
	assert c.layout == LayoutKind.SIT_CENTER
	assert c.confidence <= 0.5


	def test_chart_threshold_is_exported():
	# guard against the tuning constant silently being removed
	assert 0.0 < _CHART_WIDTH_SPLIT_THRESHOLD < 1.0


	# ---------------------------------------------------------------------------
	# layout_instruction_from_regions
	# ---------------------------------------------------------------------------


	def test_layout_instruction_from_regions_split():
	r = SceneRegions(
	scene_id="s0",
	chart_bbox=BoundingBox(x1=0.0, y1=0.0, x2=0.66, y2=1.0),
	person_bbox=BoundingBox(x1=0.72, y1=0.1, x2=0.99, y2=0.95),
	)
	c = classify_from_regions(r)
	instr = layout_instruction_from_regions(r, c)
	assert instr.layout == LayoutKind.SPLIT_CHART_PERSON
	# person_x_norm = center of (0.72, 0.99) = 0.855
	assert instr.person_x_norm == pytest.approx(0.855, rel=1e-3)
	# chart_x_norm = left edge = 0.0
	assert instr.chart_x_norm == pytest.approx(0.0)


	def test_layout_instruction_defaults_when_no_regions():
	r = SceneRegions(scene_id="s0")
	c = SceneClassification(
	scene_id="s0", layout=LayoutKind.SIT_CENTER, confidence=0.3, reason="default"
	)
	instr = layout_instruction_from_regions(r, c)
	assert instr.person_x_norm == 0.5
	assert instr.chart_x_norm == 0.0


	def test_classify_scenes_with_vision_llm_returns_pairs():
	scenes = [_scene(0)]

	def vision_fn(*_a) -> str:
	return json.dumps(
	{
	"person_bbox": {"x1": 0.1, "y1": 0.1, "x2": 0.95, "y2": 0.95},
	"chart_bbox": None,
	"ocr_text": "",
	"reason": "solo subject",
	}
	)

	pairs = classify_scenes_with_vision_llm(scenes, vision_fn)
	assert len(pairs) == 1
	regions, classification = pairs[0]
	assert regions.person_bbox is not None
	assert classification.layout == LayoutKind.ZOOM_CALL_CENTER