Spaces:

phamha
/

engineering-drawing

Sleeping

App Files Files Community

Harry Pham commited on 18 days ago

Commit

d640e48

1 Parent(s): cb8c63c

update OCR

Browse files

Files changed (1) hide show

src/inference.py +63 -11

src/inference.py CHANGED Viewed

@@ -29,27 +29,37 @@ _ocr_easyocr = None
 _ocr_vietocr = None
-# Real-ESRGAN upscaler (optional)
 REALESRGAN_AVAILABLE = False
 try:
     from realesrgan import RealESRGANer
     from basicsr.archs.rrdbnet_arch import RRDBNet
     REALESRGAN_AVAILABLE = True
-    print("[INFO] Real-ESRGAN available")
 except ImportError:
     print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")
 def get_esrgan_upsampler():
     if not REALESRGAN_AVAILABLE:
         return None
-    model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
-    upsampler = RealESRGANer(
-        scale=4,
-        model_path='weights/RealESRGAN_x4plus_anime_6B.pth',
-        model=model,
-        device=DEVICE
-    )
-    return upsampler
 def upscale_if_needed(img_bgr, min_dim=300):
     """Upscale image using Real-ESRGAN if both dimensions are below threshold."""
@@ -522,6 +532,28 @@ def get_easyocr_reader():
 # PREPROCESSING
 # ============================================================
 def preprocess_for_ocr(img_bgr, min_width=1500, mode="note"):
     h, w = img_bgr.shape[:2]
@@ -671,6 +703,9 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
     best_items = []
     best_conf = 0.0
     # Pass 1: Color preprocessing
     img_v1 = preprocess_for_ocr(img_bgr, min_width=1500, mode=ocr_type)
     items1, conf1 = ocr_single_pass(reader, img_v1)
@@ -698,8 +733,15 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
     if conf4 > best_conf:
         best_conf = conf4
         best_items = items4
-    print(f"      Multi-pass confidences: {conf1:.3f}, {conf2:.3f}, {conf3:.3f}, {conf4:.3f} → best={best_conf:.3f}")
     return best_items, best_conf
@@ -919,6 +961,16 @@ def ocr_cell_improved(img_cell, backend="paddle"):
     if conf3 > best_conf and text3.strip():
         best_conf = conf3
         best_text = text3
     # Also try English PaddleOCR for specs like "M6x50", "CT3"
     if backend == "paddle":

 _ocr_vietocr = None
 REALESRGAN_AVAILABLE = False
+_esrgan_upsampler = None  # Thêm biến global
 try:
     from realesrgan import RealESRGANer
     from basicsr.archs.rrdbnet_arch import RRDBNet
     REALESRGAN_AVAILABLE = True
+    print("[INFO] Real-ESRGAN is available")
 except ImportError:
     print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")
 def get_esrgan_upsampler():
+    global _esrgan_upsampler
     if not REALESRGAN_AVAILABLE:
         return None
+    if _esrgan_upsampler is None:
+        try:
+            print("[INFO] Loading Real-ESRGAN model...")
+            model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
+            _esrgan_upsampler = RealESRGANer(
+                scale=4,
+                model_path='weights/RealESRGAN_x4plus_anime_6B.pth',
+                model=model,
+                device=DEVICE
+            )
+        except Exception as e:
+            print(f"[WARN] Failed to load Real-ESRGAN: {e}")
+            return None
+    return _esrgan_upsampler
 def upscale_if_needed(img_bgr, min_dim=300):
     """Upscale image using Real-ESRGAN if both dimensions are below threshold."""
 # PREPROCESSING
 # ============================================================
+def enhance_faded_text(img_bgr):
+    """Giải pháp 4: Unsharp Masking kết hợp Local Thresholding cho nét chữ mờ"""
+    gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
+    # 1. Unsharp Masking (Tăng cường cạnh/nét chữ)
+    gaussian = cv2.GaussianBlur(gray, (0, 0), 2.0)
+    unsharp = cv2.addWeighted(gray, 1.5, gaussian, -0.5, 0)
+    # 2. Ngưỡng cục bộ (Local Thresholding)
+    try:
+        from skimage.filters import threshold_sauvola
+        window_size = 25
+        thresh = threshold_sauvola(unsharp, window_size=window_size)
+        binary = (unsharp > thresh) * 255
+        binary = binary.astype(np.uint8)
+    except ImportError:
+        # Fallback về OpenCV nếu chưa cài scikit-image
+        binary = cv2.adaptiveThreshold(unsharp, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                       cv2.THRESH_BINARY, 21, 10)
+    return cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
 def preprocess_for_ocr(img_bgr, min_width=1500, mode="note"):
     h, w = img_bgr.shape[:2]
     best_items = []
     best_conf = 0.0
+    # [NẾU ẢNH NHỎ LÀ DO CẮT TỪ GÓC, ÚP SCALE LUÔN TRƯỚC KHI LÀM GÌ ĐÓ]
+    img_bgr = upscale_if_needed(img_bgr, min_dim=400)
     # Pass 1: Color preprocessing
     img_v1 = preprocess_for_ocr(img_bgr, min_width=1500, mode=ocr_type)
     items1, conf1 = ocr_single_pass(reader, img_v1)
     if conf4 > best_conf:
         best_conf = conf4
         best_items = items4
+    # --- THÊM PASS 5: Giải quyết chữ bị mờ, lợt ---
+    img_v5 = enhance_faded_text(img_bgr)
+    items5, conf5 = ocr_single_pass(reader, img_v5)
+    if conf5 > best_conf:
+        best_conf = conf5
+        best_items = items5
+    print(f"      Multi-pass confidences: {conf1:.3f}, {conf2:.3f}, {conf3:.3f}, {conf4:.3f}, {conf5:.3f} → best={best_conf:.3f}")
     return best_items, best_conf
     if conf3 > best_conf and text3.strip():
         best_conf = conf3
         best_text = text3
+    # --- THÊM VARIANT 4: Dành cho nét chữ viết tay bị mờ/đứt nét ---
+    img_proc4 = enhance_faded_text(img_cell)
+    items4, conf4 = ocr_single_pass(reader, img_proc4)
+    text4 = " ".join([it["text"] for it in items4])
+    if conf4 > best_conf and text4.strip():
+        best_conf = conf4
+        best_text = text4
+    # Also try English PaddleOCR for specs like "M6x50", "CT3"
     # Also try English PaddleOCR for specs like "M6x50", "CT3"
     if backend == "paddle":