Spaces:
Running
Running
Update working_yolo_pipeline.py
Browse files- working_yolo_pipeline.py +2 -19
working_yolo_pipeline.py
CHANGED
|
@@ -735,23 +735,6 @@ def calculate_x_gutters(word_data: list, params: Dict, page_height: float) -> Li
|
|
| 735 |
#======================================================================================================================================
|
| 736 |
|
| 737 |
|
| 738 |
-
def deskew_image(img_np: np.ndarray) -> np.ndarray:
|
| 739 |
-
"""Detects and corrects small rotation angles (±5°) before OCR."""
|
| 740 |
-
gray = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) if len(img_np.shape) == 3 else img_np
|
| 741 |
-
gray = cv2.bitwise_not(gray)
|
| 742 |
-
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
| 743 |
-
coords = np.column_stack(np.where(thresh > 0))
|
| 744 |
-
if len(coords) < 100:
|
| 745 |
-
return img_np
|
| 746 |
-
angle = cv2.minAreaRect(coords)[-1]
|
| 747 |
-
if angle < -45:
|
| 748 |
-
angle = 90 + angle
|
| 749 |
-
if abs(angle) < 0.5: # Don't bother for tiny angles
|
| 750 |
-
return img_np
|
| 751 |
-
(h, w) = img_np.shape[:2]
|
| 752 |
-
center = (w // 2, h // 2)
|
| 753 |
-
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
| 754 |
-
return cv2.warpAffine(img_np, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
| 755 |
|
| 756 |
def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
| 757 |
top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
|
|
@@ -782,7 +765,7 @@ def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
|
| 782 |
elif pix.n == 4:
|
| 783 |
img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
|
| 784 |
|
| 785 |
-
|
| 786 |
|
| 787 |
# CRITICAL FIX: Use return_word_box=True and access word_results
|
| 788 |
ocr_result = ocr_engine(img_np, return_word_box=True)
|
|
@@ -1354,7 +1337,7 @@ def preprocess_and_ocr_page(original_img: np.ndarray, model, pdf_path: str,
|
|
| 1354 |
elif pix_ocr.n == 4:
|
| 1355 |
img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
|
| 1356 |
|
| 1357 |
-
|
| 1358 |
|
| 1359 |
# return_word_box=True gives word-level boxes instead of line-level
|
| 1360 |
ocr_out = ocr_engine(img_ocr_np, return_word_box=True)
|
|
|
|
| 735 |
#======================================================================================================================================
|
| 736 |
|
| 737 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
|
| 739 |
def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
|
| 740 |
top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
|
|
|
|
| 765 |
elif pix.n == 4:
|
| 766 |
img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
|
| 767 |
|
| 768 |
+
|
| 769 |
|
| 770 |
# CRITICAL FIX: Use return_word_box=True and access word_results
|
| 771 |
ocr_result = ocr_engine(img_np, return_word_box=True)
|
|
|
|
| 1337 |
elif pix_ocr.n == 4:
|
| 1338 |
img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
|
| 1339 |
|
| 1340 |
+
|
| 1341 |
|
| 1342 |
# return_word_box=True gives word-level boxes instead of line-level
|
| 1343 |
ocr_out = ocr_engine(img_ocr_np, return_word_box=True)
|