tonyneel
/

sam-small

Mask Generation

sam2

Model card Files Files and versions

xet

Community

Tony Neel commited on Jan 27, 2025

Commit

796780d

1 Parent(s): 8ba5658

com

Browse files

Files changed (2) hide show

handler.py +81 -38
requirements.txt +10 -5

handler.py CHANGED Viewed

@@ -1,47 +1,90 @@
-from typing import Dict, List, Any
-from transformers import SamModel, SamProcessor
 import torch
 class EndpointHandler:
     def __init__(self, path=""):
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model = SamModel.from_pretrained(path).to(self.device)
-        self.processor = SamProcessor.from_pretrained(path)
-    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
-        Handle image segmentation requests
         Args:
-            data: Dictionary containing:
-                inputs: Raw image bytes
         Returns:
-            List of dictionaries containing segmentation masks
         """
-        # Get raw image bytes from the request
-        raw_image = data.pop("inputs", data)
-        # Process the image
-        inputs = self.processor(raw_image, return_tensors="pt").to(self.device)
-        # Generate image embeddings
-        image_embeddings = self.model.get_image_embeddings(inputs["pixel_values"])
-        # Generate masks
-        outputs = self.model.generate(
-            image_embeddings=image_embeddings,
-            return_dict=True
-        )
-        # Process outputs
-        masks = outputs.pred_masks.squeeze().cpu().numpy()
-        scores = outputs.iou_scores.squeeze().cpu().numpy()
-        # Format response
-        results = []
-        for mask, score in zip(masks, scores):
-            results.append({
-                "mask": mask.tolist(),  # Convert numpy array to list for JSON serialization
-                "score": float(score)
-            })
-        return results

+from typing import Dict, List, Any, Union
+from sam2.sam2_image_predictor import SAM2ImagePredictor
 import torch
+import numpy as np
+from PIL import Image
+import io
+import base64
 class EndpointHandler:
     def __init__(self, path=""):
+        """Initialize the handler with SAM2 model"""
+        self.predictor = SAM2ImagePredictor.from_pretrained("facebook/sam2-hiera-small")
+    def _load_image(self, image_data: Union[str, bytes]) -> Image.Image:
+        """Load image from binary or base64 data"""
+        try:
+            # Handle base64 encoded data
+            if isinstance(image_data, str):
+                image_data = base64.b64decode(image_data)
+            # Convert bytes to PIL Image
+            image = Image.open(io.BytesIO(image_data))
+            return image
+        except Exception as e:
+            raise ValueError(f"Failed to load image: {str(e)}")
+    def __call__(self, data: Union[Dict[str, Any], bytes]) -> Dict[str, Any]:
         """
+        Handle incoming request data
         Args:
+            data: Either raw bytes or dictionary containing:
+                - image data (raw binary or base64)
+                - optional point_coords: List of [x,y] coordinates for clicks
+                - optional point_labels: List of 1 (foreground) or 0 (background)
         Returns:
+            Dictionary containing masks and scores
         """
+        try:
+            # Handle different input formats
+            if isinstance(data, dict):
+                image_data = data.get("inputs", data)
+                # Get optional point prompts
+                point_coords = data.get("point_coords", None)
+                point_labels = data.get("point_labels", None)
+            else:
+                image_data = data
+                point_coords = None
+                point_labels = None
+            # Load and convert image
+            image = self._load_image(image_data)
+            image_array = np.array(image)
+            # Process with SAM2
+            with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16):
+                self.predictor.set_image(image_array)
+                # If point prompts provided, use them
+                if point_coords is not None and point_labels is not None:
+                    point_coords = np.array(point_coords)
+                    point_labels = np.array(point_labels)
+                    masks, scores, logits = self.predictor.predict(
+                        point_coords=point_coords,
+                        point_labels=point_labels
+                    )
+                else:
+                    # Default automatic mask generation
+                    masks, scores, logits = self.predictor.predict()
+            # Convert outputs to JSON-serializable format
+            if masks is not None:
+                masks = [mask.tolist() for mask in masks]
+                scores = scores.tolist() if scores is not None else None
+                return {
+                    "masks": masks,
+                    "scores": scores,
+                    "status": "success"
+                }
+            else:
+                return {
+                    "error": "No masks generated",
+                    "status": "error"
+                }
+        except Exception as e:
+            return {
+                "error": str(e),
+                "status": "error"
+            }

requirements.txt CHANGED Viewed

@@ -1,5 +1,10 @@
-sam2
-transformers
-torch
-pillow
-numpy

+sam2>=0.1.0
+torch>=2.0.0
+numpy>=1.24.0
+Pillow>=10.0.0
+transformers>=4.30.0
+accelerate>=0.20.0
+timm>=0.9.0
+opencv-python>=4.8.0
+scipy>=1.10.0
+scikit-image>=0.21.0