Spaces:

heerjtdev
/

layout_latex

Running

App Files Files Community

heerjtdev commited on Mar 19

Commit

1970d0f

verified ·

1 Parent(s): 7eae096

Update working_yolo_pipeline.py

Browse files

Files changed (1) hide show

working_yolo_pipeline.py +2 -19

working_yolo_pipeline.py CHANGED Viewed

@@ -735,23 +735,6 @@ def calculate_x_gutters(word_data: list, params: Dict, page_height: float) -> Li
 #======================================================================================================================================
-def deskew_image(img_np: np.ndarray) -> np.ndarray:
-    """Detects and corrects small rotation angles (±5°) before OCR."""
-    gray = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) if len(img_np.shape) == 3 else img_np
-    gray = cv2.bitwise_not(gray)
-    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
-    coords = np.column_stack(np.where(thresh > 0))
-    if len(coords) < 100:
-        return img_np
-    angle = cv2.minAreaRect(coords)[-1]
-    if angle < -45:
-        angle = 90 + angle
-    if abs(angle) < 0.5:  # Don't bother for tiny angles
-        return img_np
-    (h, w) = img_np.shape[:2]
-    center = (w // 2, h // 2)
-    M = cv2.getRotationMatrix2D(center, angle, 1.0)
-    return cv2.warpAffine(img_np, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
 def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
                                 top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
@@ -782,7 +765,7 @@ def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
             elif pix.n == 4:
                 img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
-            img_np = deskew_image(img_np)
             # CRITICAL FIX: Use return_word_box=True and access word_results
             ocr_result = ocr_engine(img_np, return_word_box=True)
@@ -1354,7 +1337,7 @@ def preprocess_and_ocr_page(original_img: np.ndarray, model, pdf_path: str,
                 elif pix_ocr.n == 4:
                     img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
-                img_ocr_np = deskew_image(img_ocr_np)
                 # return_word_box=True gives word-level boxes instead of line-level
                 ocr_out = ocr_engine(img_ocr_np, return_word_box=True)

 #======================================================================================================================================
 def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
                                 top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
             elif pix.n == 4:
                 img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
             # CRITICAL FIX: Use return_word_box=True and access word_results
             ocr_result = ocr_engine(img_np, return_word_box=True)
                 elif pix_ocr.n == 4:
                     img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
                 # return_word_box=True gives word-level boxes instead of line-level
                 ocr_out = ocr_engine(img_ocr_np, return_word_box=True)