Spaces:
Sleeping
Sleeping
File size: 7,600 Bytes
eda316b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | """Tests for the scene-change + vision-LLM + OCR bbox primitive.
Covers:
* happy path: well-formed JSON -> populated ``SceneRegions``.
* bad JSON: degrade to empty regions + raw_reason, never raise.
* bad bbox: one malformed bbox does not take down the whole scene record.
* classification dispatch: chart width -> SPLIT; wide person -> ZOOM; else SIT.
* layout instruction derivation: ``person_x_norm`` / ``chart_x_norm`` come
from the bboxes when present, defaults when not.
"""
import json
import pytest
from humeo_core.primitives.vision import (
_CHART_WIDTH_SPLIT_THRESHOLD,
classify_from_regions,
classify_scenes_with_vision_llm,
detect_regions_with_llm,
layout_instruction_from_regions,
)
from humeo_core.schemas import (
BoundingBox,
LayoutKind,
Scene,
SceneClassification,
SceneRegions,
)
# ---------------------------------------------------------------------------
# Schema
# ---------------------------------------------------------------------------
def test_bounding_box_requires_x2_gt_x1():
BoundingBox(x1=0.1, y1=0.1, x2=0.2, y2=0.2)
with pytest.raises(ValueError):
BoundingBox(x1=0.2, y1=0.1, x2=0.1, y2=0.2)
with pytest.raises(ValueError):
BoundingBox(x1=0.1, y1=0.2, x2=0.2, y2=0.1)
def test_bounding_box_center_and_width():
b = BoundingBox(x1=0.2, y1=0.4, x2=0.6, y2=0.9)
assert b.center_x == pytest.approx(0.4)
assert b.center_y == pytest.approx(0.65)
assert b.width == pytest.approx(0.4)
# ---------------------------------------------------------------------------
# detect_regions_with_llm
# ---------------------------------------------------------------------------
def _scene(i: int, kf: str | None = "/tmp/x.jpg") -> Scene:
return Scene(scene_id=f"s{i}", start_time=float(i), end_time=float(i) + 1.0, keyframe_path=kf)
def test_detect_regions_happy_path():
scenes = [_scene(0)]
def vision_fn(_img: str, _prompt: str) -> str:
return json.dumps(
{
"person_bbox": {"x1": 0.7, "y1": 0.1, "x2": 0.98, "y2": 0.9, "confidence": 0.9},
"chart_bbox": {"x1": 0.02, "y1": 0.05, "x2": 0.65, "y2": 0.95, "confidence": 0.8},
"ocr_text": "Inflation YoY",
"reason": "explainer layout",
}
)
out = detect_regions_with_llm(scenes, vision_fn)
assert len(out) == 1
r = out[0]
assert r.scene_id == "s0"
assert r.person_bbox and r.person_bbox.center_x > 0.8
assert r.chart_bbox and r.chart_bbox.width > 0.6
assert "Inflation" in r.ocr_text
def test_detect_regions_bad_json_is_safe():
scenes = [_scene(0)]
def vision_fn(*_a) -> str:
return "not json"
out = detect_regions_with_llm(scenes, vision_fn)
assert out[0].person_bbox is None
assert out[0].chart_bbox is None
assert "parse error" in out[0].raw_reason.lower()
def test_detect_regions_missing_keyframe_is_safe():
scenes = [_scene(0, kf=None)]
def vision_fn(*_a) -> str: # pragma: no cover - should not be called
raise AssertionError("vision_fn must not be called without a keyframe")
out = detect_regions_with_llm(scenes, vision_fn)
assert out[0].person_bbox is None
assert "no keyframe" in out[0].raw_reason.lower()
def test_detect_regions_bad_bbox_degrades_gracefully():
scenes = [_scene(0)]
def vision_fn(*_a) -> str:
return json.dumps(
{
"person_bbox": {"x1": 0.5, "y1": 0.1, "x2": 0.3, "y2": 0.9},
"chart_bbox": {"x1": 0.02, "y1": 0.05, "x2": 0.65, "y2": 0.95},
"ocr_text": "",
"reason": "person bbox inverted",
}
)
out = detect_regions_with_llm(scenes, vision_fn)
assert out[0].person_bbox is None
assert out[0].chart_bbox is not None
# ---------------------------------------------------------------------------
# classify_from_regions
# ---------------------------------------------------------------------------
def test_classify_wide_chart_is_split():
r = SceneRegions(
scene_id="s0",
chart_bbox=BoundingBox(x1=0.0, y1=0.0, x2=0.66, y2=1.0),
person_bbox=BoundingBox(x1=0.72, y1=0.1, x2=0.99, y2=0.95),
)
c = classify_from_regions(r)
assert c.layout == LayoutKind.SPLIT_CHART_PERSON
assert c.confidence > 0.5
def test_classify_narrow_chart_not_split():
r = SceneRegions(
scene_id="s0",
chart_bbox=BoundingBox(x1=0.4, y1=0.2, x2=0.5, y2=0.4),
person_bbox=BoundingBox(x1=0.3, y1=0.1, x2=0.85, y2=0.95),
)
c = classify_from_regions(r)
# chart width (0.1) is below the split threshold -> not split
assert c.layout != LayoutKind.SPLIT_CHART_PERSON
def test_classify_wide_person_is_zoom_call():
r = SceneRegions(
scene_id="s0",
person_bbox=BoundingBox(x1=0.1, y1=0.05, x2=0.9, y2=0.98),
)
c = classify_from_regions(r)
assert c.layout == LayoutKind.ZOOM_CALL_CENTER
def test_classify_small_person_is_sit_center():
r = SceneRegions(
scene_id="s0",
person_bbox=BoundingBox(x1=0.4, y1=0.2, x2=0.6, y2=0.8),
)
c = classify_from_regions(r)
assert c.layout == LayoutKind.SIT_CENTER
def test_classify_nothing_detected_defaults_sit_center_low_conf():
r = SceneRegions(scene_id="s0", raw_reason="model returned null")
c = classify_from_regions(r)
assert c.layout == LayoutKind.SIT_CENTER
assert c.confidence <= 0.5
def test_chart_threshold_is_exported():
# guard against the tuning constant silently being removed
assert 0.0 < _CHART_WIDTH_SPLIT_THRESHOLD < 1.0
# ---------------------------------------------------------------------------
# layout_instruction_from_regions
# ---------------------------------------------------------------------------
def test_layout_instruction_from_regions_split():
r = SceneRegions(
scene_id="s0",
chart_bbox=BoundingBox(x1=0.0, y1=0.0, x2=0.66, y2=1.0),
person_bbox=BoundingBox(x1=0.72, y1=0.1, x2=0.99, y2=0.95),
)
c = classify_from_regions(r)
instr = layout_instruction_from_regions(r, c)
assert instr.layout == LayoutKind.SPLIT_CHART_PERSON
# person_x_norm = center of (0.72, 0.99) = 0.855
assert instr.person_x_norm == pytest.approx(0.855, rel=1e-3)
# chart_x_norm = left edge = 0.0
assert instr.chart_x_norm == pytest.approx(0.0)
def test_layout_instruction_defaults_when_no_regions():
r = SceneRegions(scene_id="s0")
c = SceneClassification(
scene_id="s0", layout=LayoutKind.SIT_CENTER, confidence=0.3, reason="default"
)
instr = layout_instruction_from_regions(r, c)
assert instr.person_x_norm == 0.5
assert instr.chart_x_norm == 0.0
def test_classify_scenes_with_vision_llm_returns_pairs():
scenes = [_scene(0)]
def vision_fn(*_a) -> str:
return json.dumps(
{
"person_bbox": {"x1": 0.1, "y1": 0.1, "x2": 0.95, "y2": 0.95},
"chart_bbox": None,
"ocr_text": "",
"reason": "solo subject",
}
)
pairs = classify_scenes_with_vision_llm(scenes, vision_fn)
assert len(pairs) == 1
regions, classification = pairs[0]
assert regions.person_bbox is not None
assert classification.layout == LayoutKind.ZOOM_CALL_CENTER
|