Spaces:

Babajaan
/

viral-images

Running

App Files Files Community

viral-images / features /ocr.py

Babajaan

Full Viral Images v1.0 implementation - all modules and configs

6ceaa94 verified 14 days ago

raw

history blame contribute delete

2.84 kB

	"""OCR-based readability features. Fallback to heuristic when OCR unavailable."""

	import numpy as np
	from PIL import Image
	from typing import Dict


	def _compute_text_heuristic(img: Image.Image) -> Dict[str, float]:
	"""
	Fallback heuristic: detect high-contrast regions as proxy for text presence.
	Returns reasonable defaults when OCR is unavailable.
	"""
	gray = np.array(img.convert("L")).astype(np.float32)
	# Edge density is proxy for text
	edges = np.abs(np.diff(gray, axis=1, append=gray[:, -1:])) + np.abs(np.diff(gray, axis=0, append=gray[-1:, :]))
	text_proxy = float(edges.mean()) / (gray.max() + 1e-8)

	return {
	"text_coverage": 0.0,
	"avg_ocr_confidence": 0.0,
	"word_count": 0,
	"text_density": 0.0,
	"avg_text_height_ratio": 0.0,
	"has_text": False,
	"text_proxy": text_proxy,
	}


	def compute_ocr_features(img: Image.Image, ocr_results: list = None) -> Dict[str, float]:
	"""
	Compute OCR-based readability features.

	Args:
	img: PIL Image
	ocr_results: List of (bbox, text, confidence) tuples from EasyOCR.
	If None, uses heuristic fallback.

	Returns dict with keys:
	text_coverage, avg_ocr_confidence, word_count, text_density,
	avg_text_height_ratio, has_text, text_proxy
	"""
	if ocr_results is None:
	return _compute_text_heuristic(img)

	total_area = img.width * img.height

	if not ocr_results:
	return {
	"text_coverage": 0.0,
	"avg_ocr_confidence": 0.0,
	"word_count": 0,
	"text_density": 0.0,
	"avg_text_height_ratio": 0.0,
	"has_text": False,
	"text_proxy": 0.0,
	}

	text_area = 0.0
	total_conf = 0.0
	total_height = 0.0

	for result in ocr_results:
	# result[0] is bbox: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
	# result[1] is text string
	# result[2] is confidence
	bbox = result[0]
	conf = result[2]

	# Approximate area from bbox
	xs = [p[0] for p in bbox]
	ys = [p[1] for p in bbox]
	area = (max(xs) - min(xs)) * (max(ys) - min(ys))
	text_area += area
	total_conf += conf
	total_height += max(ys) - min(ys)

	num_detections = len(ocr_results)
	avg_height = total_height / num_detections

	return {
	"text_coverage": float(text_area / (total_area + 1e-8)),
	"avg_ocr_confidence": float(total_conf / num_detections),
	"word_count": num_detections,
	"text_density": float(num_detections / (total_area / 10000 + 1e-8)),
	"avg_text_height_ratio": float(avg_height / (img.height + 1e-8)),
	"has_text": True,
	"text_proxy": float(total_conf / num_detections),
	}