Spaces:

habulaj
/

subapi

Running

App Files Files Community

habulaj commited on 18 days ago

Commit

afb2ca8

verified ·

1 Parent(s): 1099cee

Update detect_crop_image.py

Browse files

Files changed (1) hide show

detect_crop_image.py +142 -125

detect_crop_image.py CHANGED Viewed

@@ -14,144 +14,162 @@ def detect_and_crop_image(image_path, output_image_path=None):
         print("Error: Could not open image.")
         return None
-    height, width, _ = img.shape
-    print(f"[detect_crop] Input image: {width}x{height}")
-    # --- Step 1: Build a mask of non-background pixels ---
-    # Opencv loads as BGR. np.all() checks all 3 channels, so it applies to both RGB/BGR equally well.
-    white_threshold = 240
-    black_threshold = 10
-    is_white = np.all(img >= white_threshold, axis=2)
-    is_black = np.all(img <= black_threshold, axis=2)
-    is_bg = is_white | is_black
-    is_content = ~is_bg  # True where there IS content (non-background)
-    if not np.any(is_content):
-        print("Error: Image appears to be entirely background. No crop applied.")
-        if output_image_path:
-            cv2.imwrite(output_image_path, img)
-            return output_image_path
-        return image_path
-    # --- Step 2: Find the main block of vertical content (ignoring text/watermarks) ---
-    noise_tolerance = 5
-    row_content_pixels = np.sum(is_content, axis=1)
-    row_has_content = row_content_pixels > noise_tolerance
-    blocks = []
-    in_block = False
-    start_row = 0
-    for i, has_content in enumerate(row_has_content):
-        if has_content and not in_block:
-            in_block = True
-            start_row = i
-        elif not has_content and in_block:
-            in_block = False
-            blocks.append([start_row, i - 1])
-    if in_block:
-        blocks.append([start_row, len(row_has_content) - 1])
-    if not blocks:
-        print("Error: No content blocks found.")
         return None
-    # Merge blocks separated by small gaps to handle intra-image background lines
-    gap_tolerance = 20
-    merged_blocks = []
-    curr_block = blocks[0]
-    for next_block in blocks[1:]:
-        if next_block[0] - curr_block[1] <= gap_tolerance:
-            curr_block = [curr_block[0], next_block[1]]
-        else:
-            merged_blocks.append(curr_block)
-            curr_block = next_block
-    merged_blocks.append(curr_block)
-    # Select the block with the largest number of non-white/black pixels
-    best_top, best_bottom = -1, -1
-    max_pixels = -1
-    for start, end in merged_blocks:
-        total_p = np.sum(row_content_pixels[start:end+1])
-        if total_p > max_pixels:
-            max_pixels = total_p
-            best_top, best_bottom = start, end
-    top, bottom = best_top, best_bottom
-    # Find extreme left and right columns restricted to the selected main block
-    valid_rows = is_content[top:bottom+1, :]
-    col_content_pixels = np.sum(valid_rows, axis=0)
-    cols_with_content = col_content_pixels > noise_tolerance
-    left = int(np.argmax(cols_with_content))
-    right = int(width - np.argmax(cols_with_content[::-1]) - 1)
-    print(f"[detect_crop] Detected content bounds: top={top}, bottom={bottom}, left={left}, right={right}")
-    # --- Step 3: Smart Zoom for rounded corners ---
-    zoom_limit = min(width, height) // 4  # max zoom 25%
-    zoom_amount = 0
-    while zoom_amount < zoom_limit and (right - left > 20) and (bottom - top > 20):
-        c_tl = is_bg[top, left]
-        c_tr = is_bg[top, right]
-        c_bl = is_bg[bottom, left]
-        c_br = is_bg[bottom, right]
-        if c_tl or c_tr or c_bl or c_br:
-            top += 1
-            bottom -= 1
-            left += 1
-            right -= 1
-            zoom_amount += 1
-        else:
             break
-    if zoom_amount > 0:
-        print(f"Smart Zoom applied: {zoom_amount}px inset to clear rounded corners.")
-    # --- Step 4: Validate and prepare crop area ---
-    margin = 2
-    if zoom_amount == 0:
-        top = max(0, top - margin)
-        bottom = min(height - 1, bottom + margin)
-        left = max(0, left - margin)
-        right = min(width - 1, right + margin)
-    final_w = right - left + 1
-    final_h = bottom - top + 1
     if final_w <= 0 or final_h <= 0:
         print("Error: Invalid crop dimensions after zoom.")
         return None
-    # Ensure crop dimensions are even (needed for video encoding/Gemini pipelines)
     if final_w % 2 != 0: final_w -= 1
     if final_h % 2 != 0: final_h -= 1
-    # Adjust right/bottom to match the even dimensions
-    right = left + final_w - 1
-    bottom = top + final_h - 1
-    print(f"Proposed Crop: w={final_w}, h={final_h}, x={left}, y={top}")
-    total_removed = top + (height - bottom - 1) + left + (width - right - 1)
-    if total_removed < 10:
-        print("[detect_crop] Very little border detected. No crop applied.")
-        if output_image_path:
-            cv2.imwrite(output_image_path, img)
-            print(f"Successfully created cropped image at {output_image_path}")
-            return output_image_path
-        return image_path
     # Crop the original image
-    cropped_img = img[top:bottom+1, left:right+1]
     if output_image_path is None:
         filename, ext = os.path.splitext(image_path)
@@ -161,7 +179,6 @@ def detect_and_crop_image(image_path, output_image_path=None):
     print(f"Successfully created cropped image at {output_image_path}")
     return output_image_path
 if __name__ == "__main__":
     import sys

         print("Error: Could not open image.")
         return None
+    # Convert to grayscale
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Identify "mid-tones" to separate the real photo from pure white or black backgrounds/text.
+    # JPEG artifacts mean pure white/black might vary. We use 20 to 235 as the "mid-tone" photo range.
+    mask = cv2.inRange(gray, 20, 235)
+    # 1. MORPH_OPEN (Erode then Dilate)
+    # This removes thin structures, such as text anti-aliasing, thin lines, or small icons.
+    # A 15x15 kernel removes anything thinner than 15 pixels.
+    kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
+    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
+    # 2. MORPH_CLOSE (Dilate then Erode)
+    # This merges nearby blobs and fills holes (e.g., if the photo has pure white/black areas inside).
+    # A large kernel ensures the entire main image forms one single solid block.
+    kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))
+    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
+    # Find contours
+    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    print(f"📊 Encontrados {len(contours)} contornos potenciais na imagem.")
+    if not contours:
+        print("Error: No significant non-background regions detected.")
         return None
+    # Find the contour with the largest bounding box area
+    max_area = 0
+    best_bbox = None
+    for c in contours:
+        x, y, w, h = cv2.boundingRect(c)
+        area = w * h
+        if area > max_area:
+            max_area = area
+            best_bbox = (x, y, w, h)
+    if best_bbox is None or max_area < 500:
+         print(f"❌ Aviso: Nenhum conteúdo significativo detectado (max_area={max_area} < 500).")
+         return None
+    x, y, w, h = best_bbox
+    print(f"✅ Melhor região de conteúdo: {w}x{h} @ ({x},{y}) | Área: {max_area}px")
+    x, y, w, h = best_bbox
+    # --- Smart Zoom for Rounded Corners ---
+    # If the corners of our bounding box still touch the background (white/black),
+    # it's likely a rounded corner. We "zoom in" (inset) until the corners are safe.
+    img_h, img_w = img.shape[:2]
+    def check_corners(cx, cy, cw, ch, m):
+        # Check the 4 corner pixels in the mask
+        # We use a small 3x3 average or just the point? Point is simpler.
+        coords = [
+            (cy, cx),
+            (cy, cx + cw - 1),
+            (cy + ch - 1, cx),
+            (cy + ch - 1, cx + cw - 1)
+        ]
+        for py, px in coords:
+            if m[py, px] == 0:
+                return False
+        return True
+    zoom_inset = 0
+    max_zoom = min(w, h) // 4  # Prevent zooming more than 25% of the image size
+    while not check_corners(x, y, w, h, mask) and zoom_inset < max_zoom:
+        x += 1
+        y += 1
+        w -= 2
+        h -= 2
+        zoom_inset += 1
+        if w <= 20 or h <= 20:
             break
+    if zoom_inset > 0:
+        print(f"Smart Zoom applied: {zoom_inset}px inset to clear rounded corners.")
+    # --- Validate Crops ---
+    # Only crop if the excluded region is genuinely a white/black background
+    prop_x_min = x
+    prop_y_min = y
+    prop_x_max = x + w
+    prop_y_max = y + h
+    def validate_crop(region, border_region, edge_thresh=0.80, region_thresh=0.60):
+        if region.size == 0 or border_region.size == 0:
+            return False
+        dark_edge = np.count_nonzero(border_region < 20) / border_region.size
+        light_edge = np.count_nonzero(border_region > 235) / border_region.size
+        dark_region = np.count_nonzero(region < 20) / region.size
+        light_region = np.count_nonzero(region > 235) / region.size
+        is_dark_bg = (dark_edge >= edge_thresh) and (dark_region >= region_thresh)
+        is_light_bg = (light_edge >= edge_thresh) and (light_region >= region_thresh)
+        return is_dark_bg or is_light_bg
+    # Validate Top Crop
+    if prop_y_min > 0:
+        top_region = gray[0:prop_y_min, :]
+        top_border = gray[0:min(3, prop_y_min), :]
+        if not validate_crop(top_region, top_border):
+            prop_y_min = 0
+    # Validate Bottom Crop
+    if prop_y_max < img_h:
+        bottom_region = gray[prop_y_max:img_h, :]
+        bottom_border = gray[max(img_h-3, prop_y_max):img_h, :]
+        if not validate_crop(bottom_region, bottom_border):
+            prop_y_max = img_h
+    # Validate Left Crop
+    if prop_x_min > 0:
+        left_region = gray[:, 0:prop_x_min]
+        left_border = gray[:, 0:min(3, prop_x_min)]
+        if not validate_crop(left_region, left_border):
+            prop_x_min = 0
+    # Validate Right Crop
+    if prop_x_max < img_w:
+        right_region = gray[:, prop_x_max:img_w]
+        right_border = gray[:, max(img_w-3, prop_x_max):img_w]
+        if not validate_crop(right_region, right_border):
+            prop_x_max = img_w
+    # Inset Logic (2px) - additional fixed safety margin ONLY for valid crops
+    inset = 2
+    x_min = prop_x_min + inset if prop_x_min > 0 else 0
+    y_min = prop_y_min + inset if prop_y_min > 0 else 0
+    x_max = prop_x_max - inset if prop_x_max < img_w else img_w
+    y_max = prop_y_max - inset if prop_y_max < img_h else img_h
+    final_w = x_max - x_min
+    final_h = y_max - y_min
     if final_w <= 0 or final_h <= 0:
         print("Error: Invalid crop dimensions after zoom.")
         return None
+    # Ensure crop dimensions are even
     if final_w % 2 != 0: final_w -= 1
     if final_h % 2 != 0: final_h -= 1
+    x_max = x_min + final_w
+    y_max = y_min + final_h
+    print(f"Proposed Crop: w={final_w}, h={final_h}, x={x_min}, y={y_min}")
     # Crop the original image
+    cropped_img = img[y_min:y_max, x_min:x_max]
     if output_image_path is None:
         filename, ext = os.path.splitext(image_path)
     print(f"Successfully created cropped image at {output_image_path}")
     return output_image_path
 if __name__ == "__main__":
     import sys