Babajaan's picture
Full Viral Images v1.0 implementation - all modules and configs
6ceaa94 verified
"""OCR-based readability features. Fallback to heuristic when OCR unavailable."""
import numpy as np
from PIL import Image
from typing import Dict
def _compute_text_heuristic(img: Image.Image) -> Dict[str, float]:
"""
Fallback heuristic: detect high-contrast regions as proxy for text presence.
Returns reasonable defaults when OCR is unavailable.
"""
gray = np.array(img.convert("L")).astype(np.float32)
# Edge density is proxy for text
edges = np.abs(np.diff(gray, axis=1, append=gray[:, -1:])) + np.abs(np.diff(gray, axis=0, append=gray[-1:, :]))
text_proxy = float(edges.mean()) / (gray.max() + 1e-8)
return {
"text_coverage": 0.0,
"avg_ocr_confidence": 0.0,
"word_count": 0,
"text_density": 0.0,
"avg_text_height_ratio": 0.0,
"has_text": False,
"text_proxy": text_proxy,
}
def compute_ocr_features(img: Image.Image, ocr_results: list = None) -> Dict[str, float]:
"""
Compute OCR-based readability features.
Args:
img: PIL Image
ocr_results: List of (bbox, text, confidence) tuples from EasyOCR.
If None, uses heuristic fallback.
Returns dict with keys:
text_coverage, avg_ocr_confidence, word_count, text_density,
avg_text_height_ratio, has_text, text_proxy
"""
if ocr_results is None:
return _compute_text_heuristic(img)
total_area = img.width * img.height
if not ocr_results:
return {
"text_coverage": 0.0,
"avg_ocr_confidence": 0.0,
"word_count": 0,
"text_density": 0.0,
"avg_text_height_ratio": 0.0,
"has_text": False,
"text_proxy": 0.0,
}
text_area = 0.0
total_conf = 0.0
total_height = 0.0
for result in ocr_results:
# result[0] is bbox: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
# result[1] is text string
# result[2] is confidence
bbox = result[0]
conf = result[2]
# Approximate area from bbox
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
area = (max(xs) - min(xs)) * (max(ys) - min(ys))
text_area += area
total_conf += conf
total_height += max(ys) - min(ys)
num_detections = len(ocr_results)
avg_height = total_height / num_detections
return {
"text_coverage": float(text_area / (total_area + 1e-8)),
"avg_ocr_confidence": float(total_conf / num_detections),
"word_count": num_detections,
"text_density": float(num_detections / (total_area / 10000 + 1e-8)),
"avg_text_height_ratio": float(avg_height / (img.height + 1e-8)),
"has_text": True,
"text_proxy": float(total_conf / num_detections),
}