Spaces:
Sleeping
Sleeping
Harry Pham commited on
Commit ·
d640e48
1
Parent(s): cb8c63c
update OCR
Browse files- src/inference.py +63 -11
src/inference.py
CHANGED
|
@@ -29,27 +29,37 @@ _ocr_easyocr = None
|
|
| 29 |
_ocr_vietocr = None
|
| 30 |
|
| 31 |
|
| 32 |
-
# Real-ESRGAN upscaler (optional)
|
| 33 |
REALESRGAN_AVAILABLE = False
|
|
|
|
|
|
|
| 34 |
try:
|
| 35 |
from realesrgan import RealESRGANer
|
| 36 |
from basicsr.archs.rrdbnet_arch import RRDBNet
|
| 37 |
REALESRGAN_AVAILABLE = True
|
| 38 |
-
print("[INFO] Real-ESRGAN available")
|
| 39 |
except ImportError:
|
| 40 |
print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")
|
| 41 |
|
| 42 |
def get_esrgan_upsampler():
|
|
|
|
| 43 |
if not REALESRGAN_AVAILABLE:
|
| 44 |
return None
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def upscale_if_needed(img_bgr, min_dim=300):
|
| 55 |
"""Upscale image using Real-ESRGAN if both dimensions are below threshold."""
|
|
@@ -522,6 +532,28 @@ def get_easyocr_reader():
|
|
| 522 |
# PREPROCESSING
|
| 523 |
# ============================================================
|
| 524 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
def preprocess_for_ocr(img_bgr, min_width=1500, mode="note"):
|
| 526 |
h, w = img_bgr.shape[:2]
|
| 527 |
|
|
@@ -671,6 +703,9 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
|
|
| 671 |
best_items = []
|
| 672 |
best_conf = 0.0
|
| 673 |
|
|
|
|
|
|
|
|
|
|
| 674 |
# Pass 1: Color preprocessing
|
| 675 |
img_v1 = preprocess_for_ocr(img_bgr, min_width=1500, mode=ocr_type)
|
| 676 |
items1, conf1 = ocr_single_pass(reader, img_v1)
|
|
@@ -698,8 +733,15 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
|
|
| 698 |
if conf4 > best_conf:
|
| 699 |
best_conf = conf4
|
| 700 |
best_items = items4
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
|
| 702 |
-
print(f" Multi-pass confidences: {conf1:.3f}, {conf2:.3f}, {conf3:.3f}, {conf4:.3f} → best={best_conf:.3f}")
|
| 703 |
return best_items, best_conf
|
| 704 |
|
| 705 |
|
|
@@ -919,6 +961,16 @@ def ocr_cell_improved(img_cell, backend="paddle"):
|
|
| 919 |
if conf3 > best_conf and text3.strip():
|
| 920 |
best_conf = conf3
|
| 921 |
best_text = text3
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
|
| 923 |
# Also try English PaddleOCR for specs like "M6x50", "CT3"
|
| 924 |
if backend == "paddle":
|
|
|
|
| 29 |
_ocr_vietocr = None
|
| 30 |
|
| 31 |
|
|
|
|
| 32 |
REALESRGAN_AVAILABLE = False
|
| 33 |
+
_esrgan_upsampler = None # Thêm biến global
|
| 34 |
+
|
| 35 |
try:
|
| 36 |
from realesrgan import RealESRGANer
|
| 37 |
from basicsr.archs.rrdbnet_arch import RRDBNet
|
| 38 |
REALESRGAN_AVAILABLE = True
|
| 39 |
+
print("[INFO] Real-ESRGAN is available")
|
| 40 |
except ImportError:
|
| 41 |
print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")
|
| 42 |
|
| 43 |
def get_esrgan_upsampler():
|
| 44 |
+
global _esrgan_upsampler
|
| 45 |
if not REALESRGAN_AVAILABLE:
|
| 46 |
return None
|
| 47 |
+
|
| 48 |
+
if _esrgan_upsampler is None:
|
| 49 |
+
try:
|
| 50 |
+
print("[INFO] Loading Real-ESRGAN model...")
|
| 51 |
+
model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
|
| 52 |
+
_esrgan_upsampler = RealESRGANer(
|
| 53 |
+
scale=4,
|
| 54 |
+
model_path='weights/RealESRGAN_x4plus_anime_6B.pth',
|
| 55 |
+
model=model,
|
| 56 |
+
device=DEVICE
|
| 57 |
+
)
|
| 58 |
+
except Exception as e:
|
| 59 |
+
print(f"[WARN] Failed to load Real-ESRGAN: {e}")
|
| 60 |
+
return None
|
| 61 |
+
|
| 62 |
+
return _esrgan_upsampler
|
| 63 |
|
| 64 |
def upscale_if_needed(img_bgr, min_dim=300):
|
| 65 |
"""Upscale image using Real-ESRGAN if both dimensions are below threshold."""
|
|
|
|
| 532 |
# PREPROCESSING
|
| 533 |
# ============================================================
|
| 534 |
|
| 535 |
+
def enhance_faded_text(img_bgr):
|
| 536 |
+
"""Giải pháp 4: Unsharp Masking kết hợp Local Thresholding cho nét chữ mờ"""
|
| 537 |
+
gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
|
| 538 |
+
|
| 539 |
+
# 1. Unsharp Masking (Tăng cường cạnh/nét chữ)
|
| 540 |
+
gaussian = cv2.GaussianBlur(gray, (0, 0), 2.0)
|
| 541 |
+
unsharp = cv2.addWeighted(gray, 1.5, gaussian, -0.5, 0)
|
| 542 |
+
|
| 543 |
+
# 2. Ngưỡng cục bộ (Local Thresholding)
|
| 544 |
+
try:
|
| 545 |
+
from skimage.filters import threshold_sauvola
|
| 546 |
+
window_size = 25
|
| 547 |
+
thresh = threshold_sauvola(unsharp, window_size=window_size)
|
| 548 |
+
binary = (unsharp > thresh) * 255
|
| 549 |
+
binary = binary.astype(np.uint8)
|
| 550 |
+
except ImportError:
|
| 551 |
+
# Fallback về OpenCV nếu chưa cài scikit-image
|
| 552 |
+
binary = cv2.adaptiveThreshold(unsharp, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
| 553 |
+
cv2.THRESH_BINARY, 21, 10)
|
| 554 |
+
|
| 555 |
+
return cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
|
| 556 |
+
|
| 557 |
def preprocess_for_ocr(img_bgr, min_width=1500, mode="note"):
|
| 558 |
h, w = img_bgr.shape[:2]
|
| 559 |
|
|
|
|
| 703 |
best_items = []
|
| 704 |
best_conf = 0.0
|
| 705 |
|
| 706 |
+
# [NẾU ẢNH NHỎ LÀ DO CẮT TỪ GÓC, ÚP SCALE LUÔN TRƯỚC KHI LÀM GÌ ĐÓ]
|
| 707 |
+
img_bgr = upscale_if_needed(img_bgr, min_dim=400)
|
| 708 |
+
|
| 709 |
# Pass 1: Color preprocessing
|
| 710 |
img_v1 = preprocess_for_ocr(img_bgr, min_width=1500, mode=ocr_type)
|
| 711 |
items1, conf1 = ocr_single_pass(reader, img_v1)
|
|
|
|
| 733 |
if conf4 > best_conf:
|
| 734 |
best_conf = conf4
|
| 735 |
best_items = items4
|
| 736 |
+
|
| 737 |
+
# --- THÊM PASS 5: Giải quyết chữ bị mờ, lợt ---
|
| 738 |
+
img_v5 = enhance_faded_text(img_bgr)
|
| 739 |
+
items5, conf5 = ocr_single_pass(reader, img_v5)
|
| 740 |
+
if conf5 > best_conf:
|
| 741 |
+
best_conf = conf5
|
| 742 |
+
best_items = items5
|
| 743 |
|
| 744 |
+
print(f" Multi-pass confidences: {conf1:.3f}, {conf2:.3f}, {conf3:.3f}, {conf4:.3f}, {conf5:.3f} → best={best_conf:.3f}")
|
| 745 |
return best_items, best_conf
|
| 746 |
|
| 747 |
|
|
|
|
| 961 |
if conf3 > best_conf and text3.strip():
|
| 962 |
best_conf = conf3
|
| 963 |
best_text = text3
|
| 964 |
+
|
| 965 |
+
# --- THÊM VARIANT 4: Dành cho nét chữ viết tay bị mờ/đứt nét ---
|
| 966 |
+
img_proc4 = enhance_faded_text(img_cell)
|
| 967 |
+
items4, conf4 = ocr_single_pass(reader, img_proc4)
|
| 968 |
+
text4 = " ".join([it["text"] for it in items4])
|
| 969 |
+
if conf4 > best_conf and text4.strip():
|
| 970 |
+
best_conf = conf4
|
| 971 |
+
best_text = text4
|
| 972 |
+
|
| 973 |
+
# Also try English PaddleOCR for specs like "M6x50", "CT3"
|
| 974 |
|
| 975 |
# Also try English PaddleOCR for specs like "M6x50", "CT3"
|
| 976 |
if backend == "paddle":
|