Spaces:
Running
Running
| """OCR-based readability features. Fallback to heuristic when OCR unavailable.""" | |
| import numpy as np | |
| from PIL import Image | |
| from typing import Dict | |
| def _compute_text_heuristic(img: Image.Image) -> Dict[str, float]: | |
| """ | |
| Fallback heuristic: detect high-contrast regions as proxy for text presence. | |
| Returns reasonable defaults when OCR is unavailable. | |
| """ | |
| gray = np.array(img.convert("L")).astype(np.float32) | |
| # Edge density is proxy for text | |
| edges = np.abs(np.diff(gray, axis=1, append=gray[:, -1:])) + np.abs(np.diff(gray, axis=0, append=gray[-1:, :])) | |
| text_proxy = float(edges.mean()) / (gray.max() + 1e-8) | |
| return { | |
| "text_coverage": 0.0, | |
| "avg_ocr_confidence": 0.0, | |
| "word_count": 0, | |
| "text_density": 0.0, | |
| "avg_text_height_ratio": 0.0, | |
| "has_text": False, | |
| "text_proxy": text_proxy, | |
| } | |
| def compute_ocr_features(img: Image.Image, ocr_results: list = None) -> Dict[str, float]: | |
| """ | |
| Compute OCR-based readability features. | |
| Args: | |
| img: PIL Image | |
| ocr_results: List of (bbox, text, confidence) tuples from EasyOCR. | |
| If None, uses heuristic fallback. | |
| Returns dict with keys: | |
| text_coverage, avg_ocr_confidence, word_count, text_density, | |
| avg_text_height_ratio, has_text, text_proxy | |
| """ | |
| if ocr_results is None: | |
| return _compute_text_heuristic(img) | |
| total_area = img.width * img.height | |
| if not ocr_results: | |
| return { | |
| "text_coverage": 0.0, | |
| "avg_ocr_confidence": 0.0, | |
| "word_count": 0, | |
| "text_density": 0.0, | |
| "avg_text_height_ratio": 0.0, | |
| "has_text": False, | |
| "text_proxy": 0.0, | |
| } | |
| text_area = 0.0 | |
| total_conf = 0.0 | |
| total_height = 0.0 | |
| for result in ocr_results: | |
| # result[0] is bbox: [[x1,y1], [x2,y2], [x3,y3], [x4,y4]] | |
| # result[1] is text string | |
| # result[2] is confidence | |
| bbox = result[0] | |
| conf = result[2] | |
| # Approximate area from bbox | |
| xs = [p[0] for p in bbox] | |
| ys = [p[1] for p in bbox] | |
| area = (max(xs) - min(xs)) * (max(ys) - min(ys)) | |
| text_area += area | |
| total_conf += conf | |
| total_height += max(ys) - min(ys) | |
| num_detections = len(ocr_results) | |
| avg_height = total_height / num_detections | |
| return { | |
| "text_coverage": float(text_area / (total_area + 1e-8)), | |
| "avg_ocr_confidence": float(total_conf / num_detections), | |
| "word_count": num_detections, | |
| "text_density": float(num_detections / (total_area / 10000 + 1e-8)), | |
| "avg_text_height_ratio": float(avg_height / (img.height + 1e-8)), | |
| "has_text": True, | |
| "text_proxy": float(total_conf / num_detections), | |
| } | |