Spaces:
Sleeping
Sleeping
| """Tests for the scene-change + vision-LLM + OCR bbox primitive. | |
| Covers: | |
| * happy path: well-formed JSON -> populated ``SceneRegions``. | |
| * bad JSON: degrade to empty regions + raw_reason, never raise. | |
| * bad bbox: one malformed bbox does not take down the whole scene record. | |
| * classification dispatch: chart width -> SPLIT; wide person -> ZOOM; else SIT. | |
| * layout instruction derivation: ``person_x_norm`` / ``chart_x_norm`` come | |
| from the bboxes when present, defaults when not. | |
| """ | |
| import json | |
| import pytest | |
| from humeo_core.primitives.vision import ( | |
| _CHART_WIDTH_SPLIT_THRESHOLD, | |
| classify_from_regions, | |
| classify_scenes_with_vision_llm, | |
| detect_regions_with_llm, | |
| layout_instruction_from_regions, | |
| ) | |
| from humeo_core.schemas import ( | |
| BoundingBox, | |
| LayoutKind, | |
| Scene, | |
| SceneClassification, | |
| SceneRegions, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Schema | |
| # --------------------------------------------------------------------------- | |
| def test_bounding_box_requires_x2_gt_x1(): | |
| BoundingBox(x1=0.1, y1=0.1, x2=0.2, y2=0.2) | |
| with pytest.raises(ValueError): | |
| BoundingBox(x1=0.2, y1=0.1, x2=0.1, y2=0.2) | |
| with pytest.raises(ValueError): | |
| BoundingBox(x1=0.1, y1=0.2, x2=0.2, y2=0.1) | |
| def test_bounding_box_center_and_width(): | |
| b = BoundingBox(x1=0.2, y1=0.4, x2=0.6, y2=0.9) | |
| assert b.center_x == pytest.approx(0.4) | |
| assert b.center_y == pytest.approx(0.65) | |
| assert b.width == pytest.approx(0.4) | |
| # --------------------------------------------------------------------------- | |
| # detect_regions_with_llm | |
| # --------------------------------------------------------------------------- | |
| def _scene(i: int, kf: str | None = "/tmp/x.jpg") -> Scene: | |
| return Scene(scene_id=f"s{i}", start_time=float(i), end_time=float(i) + 1.0, keyframe_path=kf) | |
| def test_detect_regions_happy_path(): | |
| scenes = [_scene(0)] | |
| def vision_fn(_img: str, _prompt: str) -> str: | |
| return json.dumps( | |
| { | |
| "person_bbox": {"x1": 0.7, "y1": 0.1, "x2": 0.98, "y2": 0.9, "confidence": 0.9}, | |
| "chart_bbox": {"x1": 0.02, "y1": 0.05, "x2": 0.65, "y2": 0.95, "confidence": 0.8}, | |
| "ocr_text": "Inflation YoY", | |
| "reason": "explainer layout", | |
| } | |
| ) | |
| out = detect_regions_with_llm(scenes, vision_fn) | |
| assert len(out) == 1 | |
| r = out[0] | |
| assert r.scene_id == "s0" | |
| assert r.person_bbox and r.person_bbox.center_x > 0.8 | |
| assert r.chart_bbox and r.chart_bbox.width > 0.6 | |
| assert "Inflation" in r.ocr_text | |
| def test_detect_regions_bad_json_is_safe(): | |
| scenes = [_scene(0)] | |
| def vision_fn(*_a) -> str: | |
| return "not json" | |
| out = detect_regions_with_llm(scenes, vision_fn) | |
| assert out[0].person_bbox is None | |
| assert out[0].chart_bbox is None | |
| assert "parse error" in out[0].raw_reason.lower() | |
| def test_detect_regions_missing_keyframe_is_safe(): | |
| scenes = [_scene(0, kf=None)] | |
| def vision_fn(*_a) -> str: # pragma: no cover - should not be called | |
| raise AssertionError("vision_fn must not be called without a keyframe") | |
| out = detect_regions_with_llm(scenes, vision_fn) | |
| assert out[0].person_bbox is None | |
| assert "no keyframe" in out[0].raw_reason.lower() | |
| def test_detect_regions_bad_bbox_degrades_gracefully(): | |
| scenes = [_scene(0)] | |
| def vision_fn(*_a) -> str: | |
| return json.dumps( | |
| { | |
| "person_bbox": {"x1": 0.5, "y1": 0.1, "x2": 0.3, "y2": 0.9}, | |
| "chart_bbox": {"x1": 0.02, "y1": 0.05, "x2": 0.65, "y2": 0.95}, | |
| "ocr_text": "", | |
| "reason": "person bbox inverted", | |
| } | |
| ) | |
| out = detect_regions_with_llm(scenes, vision_fn) | |
| assert out[0].person_bbox is None | |
| assert out[0].chart_bbox is not None | |
| # --------------------------------------------------------------------------- | |
| # classify_from_regions | |
| # --------------------------------------------------------------------------- | |
| def test_classify_wide_chart_is_split(): | |
| r = SceneRegions( | |
| scene_id="s0", | |
| chart_bbox=BoundingBox(x1=0.0, y1=0.0, x2=0.66, y2=1.0), | |
| person_bbox=BoundingBox(x1=0.72, y1=0.1, x2=0.99, y2=0.95), | |
| ) | |
| c = classify_from_regions(r) | |
| assert c.layout == LayoutKind.SPLIT_CHART_PERSON | |
| assert c.confidence > 0.5 | |
| def test_classify_narrow_chart_not_split(): | |
| r = SceneRegions( | |
| scene_id="s0", | |
| chart_bbox=BoundingBox(x1=0.4, y1=0.2, x2=0.5, y2=0.4), | |
| person_bbox=BoundingBox(x1=0.3, y1=0.1, x2=0.85, y2=0.95), | |
| ) | |
| c = classify_from_regions(r) | |
| # chart width (0.1) is below the split threshold -> not split | |
| assert c.layout != LayoutKind.SPLIT_CHART_PERSON | |
| def test_classify_wide_person_is_zoom_call(): | |
| r = SceneRegions( | |
| scene_id="s0", | |
| person_bbox=BoundingBox(x1=0.1, y1=0.05, x2=0.9, y2=0.98), | |
| ) | |
| c = classify_from_regions(r) | |
| assert c.layout == LayoutKind.ZOOM_CALL_CENTER | |
| def test_classify_small_person_is_sit_center(): | |
| r = SceneRegions( | |
| scene_id="s0", | |
| person_bbox=BoundingBox(x1=0.4, y1=0.2, x2=0.6, y2=0.8), | |
| ) | |
| c = classify_from_regions(r) | |
| assert c.layout == LayoutKind.SIT_CENTER | |
| def test_classify_nothing_detected_defaults_sit_center_low_conf(): | |
| r = SceneRegions(scene_id="s0", raw_reason="model returned null") | |
| c = classify_from_regions(r) | |
| assert c.layout == LayoutKind.SIT_CENTER | |
| assert c.confidence <= 0.5 | |
| def test_chart_threshold_is_exported(): | |
| # guard against the tuning constant silently being removed | |
| assert 0.0 < _CHART_WIDTH_SPLIT_THRESHOLD < 1.0 | |
| # --------------------------------------------------------------------------- | |
| # layout_instruction_from_regions | |
| # --------------------------------------------------------------------------- | |
| def test_layout_instruction_from_regions_split(): | |
| r = SceneRegions( | |
| scene_id="s0", | |
| chart_bbox=BoundingBox(x1=0.0, y1=0.0, x2=0.66, y2=1.0), | |
| person_bbox=BoundingBox(x1=0.72, y1=0.1, x2=0.99, y2=0.95), | |
| ) | |
| c = classify_from_regions(r) | |
| instr = layout_instruction_from_regions(r, c) | |
| assert instr.layout == LayoutKind.SPLIT_CHART_PERSON | |
| # person_x_norm = center of (0.72, 0.99) = 0.855 | |
| assert instr.person_x_norm == pytest.approx(0.855, rel=1e-3) | |
| # chart_x_norm = left edge = 0.0 | |
| assert instr.chart_x_norm == pytest.approx(0.0) | |
| def test_layout_instruction_defaults_when_no_regions(): | |
| r = SceneRegions(scene_id="s0") | |
| c = SceneClassification( | |
| scene_id="s0", layout=LayoutKind.SIT_CENTER, confidence=0.3, reason="default" | |
| ) | |
| instr = layout_instruction_from_regions(r, c) | |
| assert instr.person_x_norm == 0.5 | |
| assert instr.chart_x_norm == 0.0 | |
| def test_classify_scenes_with_vision_llm_returns_pairs(): | |
| scenes = [_scene(0)] | |
| def vision_fn(*_a) -> str: | |
| return json.dumps( | |
| { | |
| "person_bbox": {"x1": 0.1, "y1": 0.1, "x2": 0.95, "y2": 0.95}, | |
| "chart_bbox": None, | |
| "ocr_text": "", | |
| "reason": "solo subject", | |
| } | |
| ) | |
| pairs = classify_scenes_with_vision_llm(scenes, vision_fn) | |
| assert len(pairs) == 1 | |
| regions, classification = pairs[0] | |
| assert regions.person_bbox is not None | |
| assert classification.layout == LayoutKind.ZOOM_CALL_CENTER | |