habulaj commited on
Commit
afb2ca8
·
verified ·
1 Parent(s): 1099cee

Update detect_crop_image.py

Browse files
Files changed (1) hide show
  1. detect_crop_image.py +142 -125
detect_crop_image.py CHANGED
@@ -14,144 +14,162 @@ def detect_and_crop_image(image_path, output_image_path=None):
14
  print("Error: Could not open image.")
15
  return None
16
 
17
- height, width, _ = img.shape
18
- print(f"[detect_crop] Input image: {width}x{height}")
19
-
20
- # --- Step 1: Build a mask of non-background pixels ---
21
- # Opencv loads as BGR. np.all() checks all 3 channels, so it applies to both RGB/BGR equally well.
22
- white_threshold = 240
23
- black_threshold = 10
24
-
25
- is_white = np.all(img >= white_threshold, axis=2)
26
- is_black = np.all(img <= black_threshold, axis=2)
27
-
28
- is_bg = is_white | is_black
29
- is_content = ~is_bg # True where there IS content (non-background)
30
-
31
- if not np.any(is_content):
32
- print("Error: Image appears to be entirely background. No crop applied.")
33
- if output_image_path:
34
- cv2.imwrite(output_image_path, img)
35
- return output_image_path
36
- return image_path
37
-
38
- # --- Step 2: Find the main block of vertical content (ignoring text/watermarks) ---
39
- noise_tolerance = 5
40
- row_content_pixels = np.sum(is_content, axis=1)
41
- row_has_content = row_content_pixels > noise_tolerance
42
-
43
- blocks = []
44
- in_block = False
45
- start_row = 0
46
-
47
- for i, has_content in enumerate(row_has_content):
48
- if has_content and not in_block:
49
- in_block = True
50
- start_row = i
51
- elif not has_content and in_block:
52
- in_block = False
53
- blocks.append([start_row, i - 1])
54
-
55
- if in_block:
56
- blocks.append([start_row, len(row_has_content) - 1])
57
-
58
- if not blocks:
59
- print("Error: No content blocks found.")
60
  return None
61
 
62
- # Merge blocks separated by small gaps to handle intra-image background lines
63
- gap_tolerance = 20
64
- merged_blocks = []
65
- curr_block = blocks[0]
66
-
67
- for next_block in blocks[1:]:
68
- if next_block[0] - curr_block[1] <= gap_tolerance:
69
- curr_block = [curr_block[0], next_block[1]]
70
- else:
71
- merged_blocks.append(curr_block)
72
- curr_block = next_block
73
- merged_blocks.append(curr_block)
74
-
75
- # Select the block with the largest number of non-white/black pixels
76
- best_top, best_bottom = -1, -1
77
- max_pixels = -1
78
-
79
- for start, end in merged_blocks:
80
- total_p = np.sum(row_content_pixels[start:end+1])
81
- if total_p > max_pixels:
82
- max_pixels = total_p
83
- best_top, best_bottom = start, end
84
-
85
- top, bottom = best_top, best_bottom
86
-
87
- # Find extreme left and right columns restricted to the selected main block
88
- valid_rows = is_content[top:bottom+1, :]
89
- col_content_pixels = np.sum(valid_rows, axis=0)
90
- cols_with_content = col_content_pixels > noise_tolerance
91
-
92
- left = int(np.argmax(cols_with_content))
93
- right = int(width - np.argmax(cols_with_content[::-1]) - 1)
94
-
95
- print(f"[detect_crop] Detected content bounds: top={top}, bottom={bottom}, left={left}, right={right}")
96
-
97
- # --- Step 3: Smart Zoom for rounded corners ---
98
- zoom_limit = min(width, height) // 4 # max zoom 25%
99
- zoom_amount = 0
100
-
101
- while zoom_amount < zoom_limit and (right - left > 20) and (bottom - top > 20):
102
- c_tl = is_bg[top, left]
103
- c_tr = is_bg[top, right]
104
- c_bl = is_bg[bottom, left]
105
- c_br = is_bg[bottom, right]
106
-
107
- if c_tl or c_tr or c_bl or c_br:
108
- top += 1
109
- bottom -= 1
110
- left += 1
111
- right -= 1
112
- zoom_amount += 1
113
- else:
114
  break
115
 
116
- if zoom_amount > 0:
117
- print(f"Smart Zoom applied: {zoom_amount}px inset to clear rounded corners.")
118
-
119
- # --- Step 4: Validate and prepare crop area ---
120
- margin = 2
121
- if zoom_amount == 0:
122
- top = max(0, top - margin)
123
- bottom = min(height - 1, bottom + margin)
124
- left = max(0, left - margin)
125
- right = min(width - 1, right + margin)
126
-
127
- final_w = right - left + 1
128
- final_h = bottom - top + 1
129
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  if final_w <= 0 or final_h <= 0:
131
  print("Error: Invalid crop dimensions after zoom.")
132
  return None
133
 
134
- # Ensure crop dimensions are even (needed for video encoding/Gemini pipelines)
135
  if final_w % 2 != 0: final_w -= 1
136
  if final_h % 2 != 0: final_h -= 1
137
 
138
- # Adjust right/bottom to match the even dimensions
139
- right = left + final_w - 1
140
- bottom = top + final_h - 1
141
 
142
- print(f"Proposed Crop: w={final_w}, h={final_h}, x={left}, y={top}")
143
-
144
- total_removed = top + (height - bottom - 1) + left + (width - right - 1)
145
- if total_removed < 10:
146
- print("[detect_crop] Very little border detected. No crop applied.")
147
- if output_image_path:
148
- cv2.imwrite(output_image_path, img)
149
- print(f"Successfully created cropped image at {output_image_path}")
150
- return output_image_path
151
- return image_path
152
 
153
  # Crop the original image
154
- cropped_img = img[top:bottom+1, left:right+1]
155
 
156
  if output_image_path is None:
157
  filename, ext = os.path.splitext(image_path)
@@ -161,7 +179,6 @@ def detect_and_crop_image(image_path, output_image_path=None):
161
  print(f"Successfully created cropped image at {output_image_path}")
162
  return output_image_path
163
 
164
-
165
  if __name__ == "__main__":
166
  import sys
167
 
 
14
  print("Error: Could not open image.")
15
  return None
16
 
17
+ # Convert to grayscale
18
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
19
+
20
+ # Identify "mid-tones" to separate the real photo from pure white or black backgrounds/text.
21
+ # JPEG artifacts mean pure white/black might vary. We use 20 to 235 as the "mid-tone" photo range.
22
+ mask = cv2.inRange(gray, 20, 235)
23
+
24
+ # 1. MORPH_OPEN (Erode then Dilate)
25
+ # This removes thin structures, such as text anti-aliasing, thin lines, or small icons.
26
+ # A 15x15 kernel removes anything thinner than 15 pixels.
27
+ kernel_open = cv2.getStructuringElement(cv2.MORPH_RECT, (15, 15))
28
+ mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
29
+
30
+ # 2. MORPH_CLOSE (Dilate then Erode)
31
+ # This merges nearby blobs and fills holes (e.g., if the photo has pure white/black areas inside).
32
+ # A large kernel ensures the entire main image forms one single solid block.
33
+ kernel_close = cv2.getStructuringElement(cv2.MORPH_RECT, (51, 51))
34
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
35
+
36
+ # Find contours
37
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
38
+ print(f"📊 Encontrados {len(contours)} contornos potenciais na imagem.")
39
+
40
+ if not contours:
41
+ print("Error: No significant non-background regions detected.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  return None
43
 
44
+ # Find the contour with the largest bounding box area
45
+ max_area = 0
46
+ best_bbox = None
47
+
48
+ for c in contours:
49
+ x, y, w, h = cv2.boundingRect(c)
50
+ area = w * h
51
+ if area > max_area:
52
+ max_area = area
53
+ best_bbox = (x, y, w, h)
54
+
55
+ if best_bbox is None or max_area < 500:
56
+ print(f"❌ Aviso: Nenhum conteúdo significativo detectado (max_area={max_area} < 500).")
57
+ return None
58
+
59
+ x, y, w, h = best_bbox
60
+ print(f"✅ Melhor região de conteúdo: {w}x{h} @ ({x},{y}) | Área: {max_area}px")
61
+
62
+ x, y, w, h = best_bbox
63
+
64
+ # --- Smart Zoom for Rounded Corners ---
65
+ # If the corners of our bounding box still touch the background (white/black),
66
+ # it's likely a rounded corner. We "zoom in" (inset) until the corners are safe.
67
+ img_h, img_w = img.shape[:2]
68
+
69
+ def check_corners(cx, cy, cw, ch, m):
70
+ # Check the 4 corner pixels in the mask
71
+ # We use a small 3x3 average or just the point? Point is simpler.
72
+ coords = [
73
+ (cy, cx),
74
+ (cy, cx + cw - 1),
75
+ (cy + ch - 1, cx),
76
+ (cy + ch - 1, cx + cw - 1)
77
+ ]
78
+ for py, px in coords:
79
+ if m[py, px] == 0:
80
+ return False
81
+ return True
82
+
83
+ zoom_inset = 0
84
+ max_zoom = min(w, h) // 4 # Prevent zooming more than 25% of the image size
85
+
86
+ while not check_corners(x, y, w, h, mask) and zoom_inset < max_zoom:
87
+ x += 1
88
+ y += 1
89
+ w -= 2
90
+ h -= 2
91
+ zoom_inset += 1
92
+ if w <= 20 or h <= 20:
 
 
 
93
  break
94
 
95
+ if zoom_inset > 0:
96
+ print(f"Smart Zoom applied: {zoom_inset}px inset to clear rounded corners.")
97
+
98
+ # --- Validate Crops ---
99
+ # Only crop if the excluded region is genuinely a white/black background
100
+ prop_x_min = x
101
+ prop_y_min = y
102
+ prop_x_max = x + w
103
+ prop_y_max = y + h
104
+
105
+ def validate_crop(region, border_region, edge_thresh=0.80, region_thresh=0.60):
106
+ if region.size == 0 or border_region.size == 0:
107
+ return False
108
+
109
+ dark_edge = np.count_nonzero(border_region < 20) / border_region.size
110
+ light_edge = np.count_nonzero(border_region > 235) / border_region.size
111
+
112
+ dark_region = np.count_nonzero(region < 20) / region.size
113
+ light_region = np.count_nonzero(region > 235) / region.size
114
+
115
+ is_dark_bg = (dark_edge >= edge_thresh) and (dark_region >= region_thresh)
116
+ is_light_bg = (light_edge >= edge_thresh) and (light_region >= region_thresh)
117
+
118
+ return is_dark_bg or is_light_bg
119
+
120
+ # Validate Top Crop
121
+ if prop_y_min > 0:
122
+ top_region = gray[0:prop_y_min, :]
123
+ top_border = gray[0:min(3, prop_y_min), :]
124
+ if not validate_crop(top_region, top_border):
125
+ prop_y_min = 0
126
+
127
+ # Validate Bottom Crop
128
+ if prop_y_max < img_h:
129
+ bottom_region = gray[prop_y_max:img_h, :]
130
+ bottom_border = gray[max(img_h-3, prop_y_max):img_h, :]
131
+ if not validate_crop(bottom_region, bottom_border):
132
+ prop_y_max = img_h
133
+
134
+ # Validate Left Crop
135
+ if prop_x_min > 0:
136
+ left_region = gray[:, 0:prop_x_min]
137
+ left_border = gray[:, 0:min(3, prop_x_min)]
138
+ if not validate_crop(left_region, left_border):
139
+ prop_x_min = 0
140
+
141
+ # Validate Right Crop
142
+ if prop_x_max < img_w:
143
+ right_region = gray[:, prop_x_max:img_w]
144
+ right_border = gray[:, max(img_w-3, prop_x_max):img_w]
145
+ if not validate_crop(right_region, right_border):
146
+ prop_x_max = img_w
147
+
148
+ # Inset Logic (2px) - additional fixed safety margin ONLY for valid crops
149
+ inset = 2
150
+ x_min = prop_x_min + inset if prop_x_min > 0 else 0
151
+ y_min = prop_y_min + inset if prop_y_min > 0 else 0
152
+ x_max = prop_x_max - inset if prop_x_max < img_w else img_w
153
+ y_max = prop_y_max - inset if prop_y_max < img_h else img_h
154
+
155
+ final_w = x_max - x_min
156
+ final_h = y_max - y_min
157
+
158
  if final_w <= 0 or final_h <= 0:
159
  print("Error: Invalid crop dimensions after zoom.")
160
  return None
161
 
162
+ # Ensure crop dimensions are even
163
  if final_w % 2 != 0: final_w -= 1
164
  if final_h % 2 != 0: final_h -= 1
165
 
166
+ x_max = x_min + final_w
167
+ y_max = y_min + final_h
 
168
 
169
+ print(f"Proposed Crop: w={final_w}, h={final_h}, x={x_min}, y={y_min}")
 
 
 
 
 
 
 
 
 
170
 
171
  # Crop the original image
172
+ cropped_img = img[y_min:y_max, x_min:x_max]
173
 
174
  if output_image_path is None:
175
  filename, ext = os.path.splitext(image_path)
 
179
  print(f"Successfully created cropped image at {output_image_path}")
180
  return output_image_path
181
 
 
182
  if __name__ == "__main__":
183
  import sys
184