Harry Pham commited on
Commit
d640e48
·
1 Parent(s): cb8c63c

update OCR

Browse files
Files changed (1) hide show
  1. src/inference.py +63 -11
src/inference.py CHANGED
@@ -29,27 +29,37 @@ _ocr_easyocr = None
29
  _ocr_vietocr = None
30
 
31
 
32
- # Real-ESRGAN upscaler (optional)
33
  REALESRGAN_AVAILABLE = False
 
 
34
  try:
35
  from realesrgan import RealESRGANer
36
  from basicsr.archs.rrdbnet_arch import RRDBNet
37
  REALESRGAN_AVAILABLE = True
38
- print("[INFO] Real-ESRGAN available")
39
  except ImportError:
40
  print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")
41
 
42
  def get_esrgan_upsampler():
 
43
  if not REALESRGAN_AVAILABLE:
44
  return None
45
- model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
46
- upsampler = RealESRGANer(
47
- scale=4,
48
- model_path='weights/RealESRGAN_x4plus_anime_6B.pth',
49
- model=model,
50
- device=DEVICE
51
- )
52
- return upsampler
 
 
 
 
 
 
 
 
53
 
54
  def upscale_if_needed(img_bgr, min_dim=300):
55
  """Upscale image using Real-ESRGAN if both dimensions are below threshold."""
@@ -522,6 +532,28 @@ def get_easyocr_reader():
522
  # PREPROCESSING
523
  # ============================================================
524
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  def preprocess_for_ocr(img_bgr, min_width=1500, mode="note"):
526
  h, w = img_bgr.shape[:2]
527
 
@@ -671,6 +703,9 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
671
  best_items = []
672
  best_conf = 0.0
673
 
 
 
 
674
  # Pass 1: Color preprocessing
675
  img_v1 = preprocess_for_ocr(img_bgr, min_width=1500, mode=ocr_type)
676
  items1, conf1 = ocr_single_pass(reader, img_v1)
@@ -698,8 +733,15 @@ def multi_pass_ocr(img_bgr, reader, ocr_type="note"):
698
  if conf4 > best_conf:
699
  best_conf = conf4
700
  best_items = items4
 
 
 
 
 
 
 
701
 
702
- print(f" Multi-pass confidences: {conf1:.3f}, {conf2:.3f}, {conf3:.3f}, {conf4:.3f} → best={best_conf:.3f}")
703
  return best_items, best_conf
704
 
705
 
@@ -919,6 +961,16 @@ def ocr_cell_improved(img_cell, backend="paddle"):
919
  if conf3 > best_conf and text3.strip():
920
  best_conf = conf3
921
  best_text = text3
 
 
 
 
 
 
 
 
 
 
922
 
923
  # Also try English PaddleOCR for specs like "M6x50", "CT3"
924
  if backend == "paddle":
 
29
  _ocr_vietocr = None
30
 
31
 
 
32
  REALESRGAN_AVAILABLE = False
33
+ _esrgan_upsampler = None # Thêm biến global
34
+
35
  try:
36
  from realesrgan import RealESRGANer
37
  from basicsr.archs.rrdbnet_arch import RRDBNet
38
  REALESRGAN_AVAILABLE = True
39
+ print("[INFO] Real-ESRGAN is available")
40
  except ImportError:
41
  print("[WARN] Real-ESRGAN not installed. Install: pip install realesrgan basicsr")
42
 
43
  def get_esrgan_upsampler():
44
+ global _esrgan_upsampler
45
  if not REALESRGAN_AVAILABLE:
46
  return None
47
+
48
+ if _esrgan_upsampler is None:
49
+ try:
50
+ print("[INFO] Loading Real-ESRGAN model...")
51
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=6, num_grow_ch=32, scale=4)
52
+ _esrgan_upsampler = RealESRGANer(
53
+ scale=4,
54
+ model_path='weights/RealESRGAN_x4plus_anime_6B.pth',
55
+ model=model,
56
+ device=DEVICE
57
+ )
58
+ except Exception as e:
59
+ print(f"[WARN] Failed to load Real-ESRGAN: {e}")
60
+ return None
61
+
62
+ return _esrgan_upsampler
63
 
64
  def upscale_if_needed(img_bgr, min_dim=300):
65
  """Upscale image using Real-ESRGAN if both dimensions are below threshold."""
 
532
  # PREPROCESSING
533
  # ============================================================
534
 
535
+ def enhance_faded_text(img_bgr):
536
+ """Giải pháp 4: Unsharp Masking kết hợp Local Thresholding cho nét chữ mờ"""
537
+ gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
538
+
539
+ # 1. Unsharp Masking (Tăng cường cạnh/nét chữ)
540
+ gaussian = cv2.GaussianBlur(gray, (0, 0), 2.0)
541
+ unsharp = cv2.addWeighted(gray, 1.5, gaussian, -0.5, 0)
542
+
543
+ # 2. Ngưỡng cục bộ (Local Thresholding)
544
+ try:
545
+ from skimage.filters import threshold_sauvola
546
+ window_size = 25
547
+ thresh = threshold_sauvola(unsharp, window_size=window_size)
548
+ binary = (unsharp > thresh) * 255
549
+ binary = binary.astype(np.uint8)
550
+ except ImportError:
551
+ # Fallback về OpenCV nếu chưa cài scikit-image
552
+ binary = cv2.adaptiveThreshold(unsharp, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
553
+ cv2.THRESH_BINARY, 21, 10)
554
+
555
+ return cv2.cvtColor(binary, cv2.COLOR_GRAY2BGR)
556
+
557
  def preprocess_for_ocr(img_bgr, min_width=1500, mode="note"):
558
  h, w = img_bgr.shape[:2]
559
 
 
703
  best_items = []
704
  best_conf = 0.0
705
 
706
+ # [NẾU ẢNH NHỎ LÀ DO CẮT TỪ GÓC, ÚP SCALE LUÔN TRƯỚC KHI LÀM GÌ ĐÓ]
707
+ img_bgr = upscale_if_needed(img_bgr, min_dim=400)
708
+
709
  # Pass 1: Color preprocessing
710
  img_v1 = preprocess_for_ocr(img_bgr, min_width=1500, mode=ocr_type)
711
  items1, conf1 = ocr_single_pass(reader, img_v1)
 
733
  if conf4 > best_conf:
734
  best_conf = conf4
735
  best_items = items4
736
+
737
+ # --- THÊM PASS 5: Giải quyết chữ bị mờ, lợt ---
738
+ img_v5 = enhance_faded_text(img_bgr)
739
+ items5, conf5 = ocr_single_pass(reader, img_v5)
740
+ if conf5 > best_conf:
741
+ best_conf = conf5
742
+ best_items = items5
743
 
744
+ print(f" Multi-pass confidences: {conf1:.3f}, {conf2:.3f}, {conf3:.3f}, {conf4:.3f}, {conf5:.3f} → best={best_conf:.3f}")
745
  return best_items, best_conf
746
 
747
 
 
961
  if conf3 > best_conf and text3.strip():
962
  best_conf = conf3
963
  best_text = text3
964
+
965
+ # --- THÊM VARIANT 4: Dành cho nét chữ viết tay bị mờ/đứt nét ---
966
+ img_proc4 = enhance_faded_text(img_cell)
967
+ items4, conf4 = ocr_single_pass(reader, img_proc4)
968
+ text4 = " ".join([it["text"] for it in items4])
969
+ if conf4 > best_conf and text4.strip():
970
+ best_conf = conf4
971
+ best_text = text4
972
+
973
+ # Also try English PaddleOCR for specs like "M6x50", "CT3"
974
 
975
  # Also try English PaddleOCR for specs like "M6x50", "CT3"
976
  if backend == "paddle":