heerjtdev commited on
Commit
1970d0f
·
verified ·
1 Parent(s): 7eae096

Update working_yolo_pipeline.py

Browse files
Files changed (1) hide show
  1. working_yolo_pipeline.py +2 -19
working_yolo_pipeline.py CHANGED
@@ -735,23 +735,6 @@ def calculate_x_gutters(word_data: list, params: Dict, page_height: float) -> Li
735
  #======================================================================================================================================
736
 
737
 
738
- def deskew_image(img_np: np.ndarray) -> np.ndarray:
739
- """Detects and corrects small rotation angles (±5°) before OCR."""
740
- gray = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY) if len(img_np.shape) == 3 else img_np
741
- gray = cv2.bitwise_not(gray)
742
- thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
743
- coords = np.column_stack(np.where(thresh > 0))
744
- if len(coords) < 100:
745
- return img_np
746
- angle = cv2.minAreaRect(coords)[-1]
747
- if angle < -45:
748
- angle = 90 + angle
749
- if abs(angle) < 0.5: # Don't bother for tiny angles
750
- return img_np
751
- (h, w) = img_np.shape[:2]
752
- center = (w // 2, h // 2)
753
- M = cv2.getRotationMatrix2D(center, angle, 1.0)
754
- return cv2.warpAffine(img_np, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
755
 
756
  def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
757
  top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
@@ -782,7 +765,7 @@ def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
782
  elif pix.n == 4:
783
  img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
784
 
785
- img_np = deskew_image(img_np)
786
 
787
  # CRITICAL FIX: Use return_word_box=True and access word_results
788
  ocr_result = ocr_engine(img_np, return_word_box=True)
@@ -1354,7 +1337,7 @@ def preprocess_and_ocr_page(original_img: np.ndarray, model, pdf_path: str,
1354
  elif pix_ocr.n == 4:
1355
  img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
1356
 
1357
- img_ocr_np = deskew_image(img_ocr_np)
1358
 
1359
  # return_word_box=True gives word-level boxes instead of line-level
1360
  ocr_out = ocr_engine(img_ocr_np, return_word_box=True)
 
735
  #======================================================================================================================================
736
 
737
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
738
 
739
  def get_word_data_for_detection(page: fitz.Page, pdf_path: str, page_num: int,
740
  top_margin_percent=0.10, bottom_margin_percent=0.10) -> list:
 
765
  elif pix.n == 4:
766
  img_np = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR)
767
 
768
+
769
 
770
  # CRITICAL FIX: Use return_word_box=True and access word_results
771
  ocr_result = ocr_engine(img_np, return_word_box=True)
 
1337
  elif pix_ocr.n == 4:
1338
  img_ocr_np = cv2.cvtColor(img_ocr_np, cv2.COLOR_RGBA2BGR)
1339
 
1340
+
1341
 
1342
  # return_word_box=True gives word-level boxes instead of line-level
1343
  ocr_out = ocr_engine(img_ocr_np, return_word_box=True)