| import gradio as gr |
| import torch |
| import numpy as np |
| from PIL import Image |
| import cv2 |
|
|
| print("🚀 Starting SAM2 App v2.1 - OPTIMIZED...") |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f"📱 Using device: {device}") |
|
|
| model = None |
| processor = None |
|
|
| def load_model(): |
| global model, processor |
| if model is None: |
| print("📦 Loading SAM model...") |
| try: |
| from transformers import SamModel, SamProcessor |
| |
| model_name = "facebook/sam-vit-large" |
| |
| processor = SamProcessor.from_pretrained(model_name) |
| model = SamModel.from_pretrained(model_name) |
| model.to(device) |
| print(f"✅ Model loaded: {model_name}") |
| except Exception as e: |
| print(f"❌ Error: {e}, falling back to base model") |
| model_name = "facebook/sam-vit-base" |
| processor = SamProcessor.from_pretrained(model_name) |
| model = SamModel.from_pretrained(model_name) |
| model.to(device) |
| return model, processor |
|
|
| def prepare_image(image, max_size=1024): |
| if isinstance(image, np.ndarray): |
| image_pil = Image.fromarray(image) |
| else: |
| image_pil = image |
| |
| if image_pil.mode != 'RGB': |
| image_pil = image_pil.convert('RGB') |
| |
| image_np = np.array(image_pil) |
| h, w = image_np.shape[:2] |
| |
| if max(h, w) > max_size: |
| scale = max_size / max(h, w) |
| new_h, new_w = int(h * scale), int(w * scale) |
| image_pil = image_pil.resize((new_w, new_h), Image.Resampling.LANCZOS) |
| image_np = np.array(image_pil) |
| |
| return image_pil, image_np |
|
|
| def refine_mask(mask, kernel_size=5): |
| """Glättet Maskenkanten""" |
| mask_uint8 = (mask > 0).astype(np.uint8) * 255 |
| kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)) |
| mask_closed = cv2.morphologyEx(mask_uint8, cv2.MORPH_CLOSE, kernel) |
| mask_refined = cv2.morphologyEx(mask_closed, cv2.MORPH_OPEN, kernel) |
| return mask_refined > 0 |
|
|
| def segment_automatic(image, quality="high", merge_parts=True): |
| """ |
| OPTIMIERTE Automatische Segmentierung |
| Schnell & präzise - kombiniert mehrere Masken |
| """ |
| if image is None: |
| return None, {"error": "Kein Bild hochgeladen"} |
| |
| try: |
| print(f"🔄 Starting segmentation (quality: {quality}, merge: {merge_parts})...") |
| model, processor = load_model() |
| |
| image_pil, image_np = prepare_image(image) |
| h, w = image_np.shape[:2] |
| |
| center_x, center_y = w // 2, h // 2 |
| |
| |
| inputs = processor( |
| image_pil, |
| input_points=[[[center_x, center_y]]], |
| input_labels=[[1]], |
| return_tensors="pt" |
| ).to(device) |
| |
| print("🧠 Running inference...") |
| with torch.no_grad(): |
| outputs = model(**inputs, multimask_output=True) |
| |
| masks = processor.image_processor.post_process_masks( |
| outputs.pred_masks.cpu(), |
| inputs["original_sizes"].cpu(), |
| inputs["reshaped_input_sizes"].cpu() |
| )[0] |
| |
| scores = outputs.iou_scores.cpu().numpy() |
| if scores.ndim > 1: |
| scores = scores.flatten() |
| |
| print(f"✅ Got {len(scores)} masks with scores: {scores}") |
| |
| |
| if merge_parts: |
| combined_mask = np.zeros((h, w), dtype=bool) |
| masks_used = 0 |
| |
| for idx, score in enumerate(scores): |
| if score > 0.5: |
| if masks.ndim == 4: |
| mask = masks[0, idx].numpy() |
| else: |
| mask = masks[idx].numpy() |
| |
| |
| combined_mask = combined_mask | (mask > 0) |
| masks_used += 1 |
| print(f" ✅ Added mask {idx} (score: {score:.3f})") |
| |
| final_mask = combined_mask |
| print(f"🔗 Combined {masks_used} masks into one!") |
| else: |
| |
| best_idx = np.argmax(scores) |
| if masks.ndim == 4: |
| final_mask = masks[0, best_idx].numpy() > 0 |
| else: |
| final_mask = masks[best_idx].numpy() > 0 |
| masks_used = 1 |
| print(f"✅ Using best mask (score: {scores[best_idx]:.3f})") |
| |
| |
| if quality == "high": |
| print("🎨 Refining mask...") |
| final_mask = refine_mask(final_mask, kernel_size=7) |
| |
| |
| overlay = image_np.copy() |
| color = np.array([255, 80, 180]) |
| |
| mask_float = final_mask.astype(float) |
| if quality == "high": |
| mask_float = cv2.GaussianBlur(mask_float, (5, 5), 0) |
| |
| |
| for c in range(3): |
| overlay[:, :, c] = ( |
| overlay[:, :, c] * (1 - mask_float * 0.65) + |
| color[c] * mask_float * 0.65 |
| ) |
| |
| |
| contours, _ = cv2.findContours( |
| final_mask.astype(np.uint8), |
| cv2.RETR_EXTERNAL, |
| cv2.CHAIN_APPROX_SIMPLE |
| ) |
| cv2.drawContours(overlay, contours, -1, (255, 255, 0), 3) |
| |
| metadata = { |
| "success": True, |
| "mode": "automatic_plus" if merge_parts else "automatic", |
| "quality": quality, |
| "masks_combined": masks_used, |
| "all_scores": scores.tolist(), |
| "image_size": [w, h], |
| "mask_area": int(np.sum(final_mask)), |
| "mask_percentage": float(np.sum(final_mask) / (h * w) * 100), |
| "num_contours": len(contours), |
| "device": device |
| } |
| |
| print("✅ Segmentation complete!") |
| return Image.fromarray(overlay.astype(np.uint8)), metadata |
| |
| except Exception as e: |
| import traceback |
| print(f"❌ ERROR:\n{traceback.format_exc()}") |
| return image, {"error": str(e)} |
|
|
| def segment_multi_dense(image, density="medium"): |
| """Multi-Object Segmentierung mit Grid""" |
| if image is None: |
| return None, {"error": "Kein Bild"} |
| |
| try: |
| print(f"🎯 Starting multi-region segmentation (density: {density})...") |
| model, processor = load_model() |
| image_pil, image_np = prepare_image(image) |
| h, w = image_np.shape[:2] |
| |
| |
| if density == "high": |
| grid_size = 5 |
| elif density == "medium": |
| grid_size = 4 |
| else: |
| grid_size = 3 |
| |
| |
| points = [] |
| for i in range(1, grid_size + 1): |
| for j in range(1, grid_size + 1): |
| x = int(w * i / (grid_size + 1)) |
| y = int(h * j / (grid_size + 1)) |
| points.append([x, y]) |
| |
| print(f"📍 Using {len(points)} grid points ({grid_size}x{grid_size})...") |
| |
| all_masks = [] |
| all_scores = [] |
| |
| |
| for idx, point in enumerate(points): |
| inputs = processor( |
| image_pil, |
| input_points=[[point]], |
| input_labels=[[1]], |
| return_tensors="pt" |
| ).to(device) |
| |
| with torch.no_grad(): |
| outputs = model(**inputs, multimask_output=True) |
| |
| masks = processor.image_processor.post_process_masks( |
| outputs.pred_masks.cpu(), |
| inputs["original_sizes"].cpu(), |
| inputs["reshaped_input_sizes"].cpu() |
| )[0] |
| |
| scores = outputs.iou_scores.cpu().numpy().flatten() |
| best_idx = np.argmax(scores) |
| |
| if masks.ndim == 4: |
| mask = masks[0, best_idx].numpy() |
| else: |
| mask = masks[best_idx].numpy() |
| |
| |
| if scores[best_idx] > 0.7: |
| all_masks.append(refine_mask(mask)) |
| all_scores.append(scores[best_idx]) |
| |
| print(f"✅ Got {len(all_masks)} quality masks") |
| |
| |
| overlay = image_np.copy() |
| |
| |
| colors = [] |
| for i in range(len(all_masks)): |
| hue = int(180 * i / max(len(all_masks), 1)) |
| color_hsv = np.uint8([[[hue, 255, 200]]]) |
| color_rgb = cv2.cvtColor(color_hsv, cv2.COLOR_HSV2RGB)[0][0] |
| colors.append(color_rgb) |
| |
| |
| for mask, color, score in zip(all_masks, colors, all_scores): |
| alpha = 0.4 + (score - 0.7) * 0.2 |
| overlay[mask] = ( |
| overlay[mask] * (1 - alpha) + |
| np.array(color) * alpha |
| ).astype(np.uint8) |
| |
| |
| contours, _ = cv2.findContours( |
| mask.astype(np.uint8), |
| cv2.RETR_EXTERNAL, |
| cv2.CHAIN_APPROX_SIMPLE |
| ) |
| cv2.drawContours(overlay, contours, -1, color.tolist(), 2) |
| |
| metadata = { |
| "success": True, |
| "mode": "multi_object_dense", |
| "density": density, |
| "grid_size": f"{grid_size}x{grid_size}", |
| "total_points": len(points), |
| "quality_masks": len(all_masks), |
| "avg_score": float(np.mean(all_scores)) if all_scores else 0, |
| "scores": [float(s) for s in all_scores] |
| } |
| |
| print("✅ Multi-region complete!") |
| return Image.fromarray(overlay), metadata |
| |
| except Exception as e: |
| import traceback |
| print(f"❌ ERROR:\n{traceback.format_exc()}") |
| return image, {"error": str(e)} |
|
|
| |
| demo = gr.Blocks(title="SAM2 Boostly", theme=gr.themes.Soft()) |
|
|
| with demo: |
| gr.Markdown("# 🎨 SAM2 Segmentierung - Boostly Edition") |
| gr.Markdown("### ⚡ Optimierte Zero-Shot Object Segmentation") |
| |
| with gr.Tab("🤖 Automatisch PLUS"): |
| gr.Markdown("**Smart Multi-Mask Combining** - Kombiniert automatisch alle Objektteile!") |
| |
| with gr.Row(): |
| with gr.Column(): |
| input_auto = gr.Image(type="pil", label="📸 Bild hochladen") |
| |
| quality_radio = gr.Radio( |
| choices=["high", "fast"], |
| value="high", |
| label="⚙️ Qualität", |
| info="High = präzisere Kanten, Fast = schneller" |
| ) |
| |
| merge_checkbox = gr.Checkbox( |
| value=True, |
| label="🔗 Teile zusammenfügen", |
| info="Kombiniert alle erkannten Bereiche (Fisch + Flosse = 1 Objekt)" |
| ) |
| |
| btn_auto = gr.Button("🚀 Segmentieren", variant="primary", size="lg") |
| |
| gr.Markdown(""" |
| **✨ Funktionsweise:** |
| - SAM generiert 3 verschiedene Masken |
| - Wenn "Teile zusammenfügen" AN: Alle kombiniert → vollständiges Objekt |
| - Wenn AUS: Nur präziseste Maske |
| - ⚡ Optimiert: ~10-30 Sekunden statt 25 Minuten! |
| """) |
| |
| with gr.Column(): |
| output_auto = gr.Image(label="✨ Segmentiertes Bild") |
| json_auto = gr.JSON(label="📊 Metadata") |
| |
| btn_auto.click( |
| fn=segment_automatic, |
| inputs=[input_auto, quality_radio, merge_checkbox], |
| outputs=[output_auto, json_auto] |
| ) |
| |
| gr.Examples( |
| examples=[], |
| inputs=input_auto, |
| label="💡 Tipp: Objekt sollte zentral im Bild sein" |
| ) |
| |
| with gr.Tab("🎯 Multi-Region"): |
| gr.Markdown("**Grid-basierte Segmentierung** - Für mehrere separate Objekte") |
| |
| with gr.Row(): |
| with gr.Column(): |
| input_multi = gr.Image(type="pil", label="📸 Bild hochladen") |
| |
| density_radio = gr.Radio( |
| choices=["high", "medium", "low"], |
| value="medium", |
| label="📊 Punkt-Dichte", |
| info="Mehr Punkte = mehr Details, aber langsamer" |
| ) |
| |
| btn_multi = gr.Button("🎯 Alle Bereiche segmentieren", variant="primary", size="lg") |
| |
| gr.Markdown(""" |
| **Grid-Größen:** |
| - 🔥 High: 5x5 = 25 Erkennungspunkte |
| - ⚡ Medium: 4x4 = 16 Punkte (empfohlen) |
| - 💨 Low: 3x3 = 9 Punkte |
| |
| Jedes Objekt bekommt eigene Farbe! |
| """) |
| |
| with gr.Column(): |
| output_multi = gr.Image(label="✨ Segmentiertes Bild") |
| json_multi = gr.JSON(label="📊 Metadata") |
| |
| btn_multi.click( |
| fn=segment_multi_dense, |
| inputs=[input_multi, density_radio], |
| outputs=[output_multi, json_multi] |
| ) |
| |
| with gr.Tab("📡 API Dokumentation"): |
| gr.Markdown("### 🔗 API Endpoint") |
| gr.Code( |
| "https://EnginDev-Boostly.hf.space/api/predict", |
| label="Base URL" |
| ) |
| |
| gr.Markdown("### 📝 JavaScript Integration (für Lovable)") |
| gr.Code(''' |
| // Segmentation Service |
| const HUGGINGFACE_API = 'https://EnginDev-Boostly.hf.space'; |
| |
| async function segmentImage(imageFile, mode = 'automatic') { |
| // File zu Base64 konvertieren |
| const base64 = await new Promise((resolve) => { |
| const reader = new FileReader(); |
| reader.onloadend = () => resolve(reader.result); |
| reader.readAsDataURL(imageFile); |
| }); |
| |
| // API Call |
| const response = await fetch(`${HUGGINGFACE_API}/api/predict`, { |
| method: 'POST', |
| headers: {'Content-Type': 'application/json'}, |
| body: JSON.stringify({ |
| data: [base64, "high", true], // [image, quality, merge] |
| fn_index: mode === 'automatic' ? 0 : 1 |
| }) |
| }); |
| |
| const result = await response.json(); |
| |
| return { |
| segmentedImage: result.data[0], // Base64 segmentiertes Bild |
| metadata: result.data[1] // JSON mit Details |
| }; |
| } |
| |
| // Verwendung: |
| const result = await segmentImage(myImageFile, 'automatic'); |
| console.log('Mask covers:', result.metadata.mask_percentage + '%'); |
| ''', language="javascript") |
| |
| gr.Markdown("### ⚙️ Parameter") |
| gr.Markdown(""" |
| **fn_index:** |
| - `0` = Automatisch PLUS (empfohlen für einzelne Objekte) |
| - `1` = Multi-Region (für mehrere Objekte) |
| |
| **quality:** |
| - `"high"` = Präzise Kanten, Gaussian Blur, Refinement (~20-30s) |
| - `"fast"` = Schneller, weniger Nachbearbeitung (~10-15s) |
| |
| **merge (nur fn_index=0):** |
| - `true` = Kombiniert alle Masken → vollständiges Objekt |
| - `false` = Nur beste Maske → nur Hauptteil |
| |
| **density (nur fn_index=1):** |
| - `"high"` = 5x5 Grid = 25 Punkte |
| - `"medium"` = 4x4 Grid = 16 Punkte |
| - `"low"` = 3x3 Grid = 9 Punkte |
| """) |
| |
| gr.Markdown("### 📊 Response Format") |
| gr.Code(''' |
| { |
| "data": [ |
| "data:image/png;base64,iVBORw0KGgo...", // Segmentiertes Bild |
| { |
| "success": true, |
| "mode": "automatic_plus", |
| "masks_combined": 3, |
| "mask_percentage": 12.5, |
| "num_contours": 1, |
| "all_scores": [0.998, 0.583, 0.864] |
| } |
| ] |
| } |
| ''', language="json") |
|
|
| if __name__ == "__main__": |
| print("🌐 Launching Boostly SAM2 v2.1...") |
| demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True) |