Spaces:

bsod-tv
/

Localization-Quality-Control

Sleeping

Secking Claude commited on Feb 28

Commit

32d4b75

1 Parent(s): 97106f1

Implement full Modal segmentation integration for Segmentation Editing tab

- Add session state management (frames_state, masks_state, magic_code_state)
- Implement download_segment_files() to fetch frames and alpha masks from Modal volume
- Add composite_image_with_mask() for alpha overlay rendering
- Implement handle_segment_selection() with automatic file download and slider init
- Add handle_image_click() with SAM-3 segmentation function integration via Modal
- Implement load_segment_frame() with show/hide mask toggle support
- Add "Show Alpha Mask" checkbox to UI (Row 3)
- Wire all event handlers: segment selection, frame slider, mask toggle, image click
- Implement download_segment() to export edited frames + masks as ZIP
- Add Pillow and numpy to requirements.txt
- Update submit_magic_code() output signature to match new row visibility

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

app.py +313 -27
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -8,11 +8,16 @@
 import os
 import re
 import cv2
 import time
 import base64
 import modal
 import logging
 import gradio as gr
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
@@ -189,6 +194,7 @@ def submit_magic_code(magic_code):
                     gr.update(visible=False),
                     gr.update(visible=False),
                     gr.update(visible=False),
                     gr.update(visible=False)
                 )
@@ -199,6 +205,7 @@ def submit_magic_code(magic_code):
                 gr.update(visible=True),
                 gr.update(visible=True),
                 gr.update(visible=True),
                 gr.update(visible=True)
             )
@@ -210,6 +217,7 @@ def submit_magic_code(magic_code):
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False)
             )
@@ -221,33 +229,280 @@ def submit_magic_code(magic_code):
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False)
         )
-def load_segment_frame(segment_id, frame_number):
     """
-    Placeholder function to load a specific frame from a segment for editing.
-    TODO: Connect to backend API to retrieve frame image from video segment.
     """
-    if not segment_id:
-        return None
-    logger.info(f"Loading segment: {segment_id}, frame: {frame_number}")
-    # Placeholder: Return None (no image loaded yet)
-    return None
-def download_segment(segment_id, edited_image, frame_number):
     """
-    Placeholder function to download edited segment.
-    TODO: Connect to backend API to save edited segmentation and download result.
     """
-    if not segment_id:
         return "No segment selected"
     logger.info(f"Download requested for segment: {segment_id}")
-    return f"Download functionality coming soon for {segment_id}"
 # Create a professional Gradio interface using the Golden ratio (1.618) for proportions
@@ -480,6 +735,11 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
             with gr.Tab("Segmentation Editing"):
                 gr.Markdown("### Segmentation Editing Workspace")
                 # Row 1: Magic Code textbox + Submit button
                 with gr.Row(elem_classes="input-section"):
                     with gr.Column(scale=3):
@@ -500,8 +760,16 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
                         info="Select a segment to edit"
                     )
-                # Row 3: ImageEditor component (SAM-3 style interaction)
-                with gr.Row(elem_classes="input-section", visible=False) as seg_row3:
                     seg_image_editor = gr.ImageEditor(
                         label="Image Click Segmentation",
                         type="pil",
@@ -509,8 +777,8 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
                         brush=gr.Brush(colors=["#FF0000"], color_mode="fixed")
                     )
-                # Row 4: Frame number slider
-                with gr.Row(elem_classes="input-section", visible=False) as seg_row4:
                     seg_frame_slider = gr.Slider(
                         minimum=0,
                         maximum=100,
@@ -521,9 +789,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
                         info="Select video frame to segment"
                     )
-                # Row 5: Download Segment button
-                with gr.Row(visible=False) as seg_row5:
                     seg_download_btn = gr.Button("Download Segment", variant="secondary")
     # Wire Content Moderation processing
     cm_process_btn.click(
@@ -536,25 +805,42 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
     seg_submit_btn.click(
         fn=submit_magic_code,
         inputs=[seg_magic_code],
-        outputs=[seg_id_dropdown, seg_magic_code, seg_row2, seg_row3, seg_row4, seg_row5]
     )
     seg_id_dropdown.change(
-        fn=load_segment_frame,
-        inputs=[seg_id_dropdown, seg_frame_slider],
-        outputs=[seg_image_editor]
     )
     seg_frame_slider.change(
         fn=load_segment_frame,
-        inputs=[seg_id_dropdown, seg_frame_slider],
-        outputs=[seg_image_editor]
     )
     seg_download_btn.click(
         fn=download_segment,
-        inputs=[seg_id_dropdown, seg_image_editor, seg_frame_slider],
-        outputs=[seg_magic_code]  # Display status message in magic code textbox
     )
 if __name__ == "__main__":

 import os
 import re
 import cv2
+import io
 import time
 import base64
 import modal
 import logging
+import tempfile
+import numpy as np
 import gradio as gr
+from PIL import Image
+from typing import Dict, Optional, Tuple
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
                     gr.update(visible=False),
                     gr.update(visible=False),
                     gr.update(visible=False),
+                    gr.update(visible=False),
                     gr.update(visible=False)
                 )
                 gr.update(visible=True),
                 gr.update(visible=True),
                 gr.update(visible=True),
+                gr.update(visible=True),
                 gr.update(visible=True)
             )
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
+                gr.update(visible=False),
                 gr.update(visible=False)
             )
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
+            gr.update(visible=False),
             gr.update(visible=False)
         )
+def download_segment_files(magic_code: str, segment_id: str) -> Tuple[Dict[int, Image.Image], Dict[int, Image.Image], int]:
+    """
+    Download all frame images and alpha masks for a given segment from Modal volume.
+    Returns:
+        frames_dict: Dict mapping frame_index -> PIL.Image (original frames)
+        masks_dict: Dict mapping frame_index -> PIL.Image (alpha masks)
+        max_frame: Maximum frame index found
+    """
+    try:
+        modal_volume_name = os.environ['MODERATION_MODAL_VOLUME']
+        volume = modal.Volume.from_name(modal_volume_name)
+        segment_path = f"/{magic_code}/{segment_id}"
+        frames_dict = {}
+        masks_dict = {}
+        logger.info(f"Downloading files from {segment_path}")
+        # List all files in the segment directory
+        try:
+            files = list(volume.listdir(segment_path))
+        except Exception as e:
+            logger.error(f"Failed to list segment directory: {e}")
+            return {}, {}, 0
+        # Parse filenames and download
+        frame_pattern = re.compile(r'^frame_(\d+)\.(jpg|png)$')
+        alpha_pattern = re.compile(r'^alpha_frame_(\d+)\.png$')
+        for entry in files:
+            if entry.type != modal.volume.FileEntryType.FILE:
+                continue
+            filename = os.path.basename(entry.path)
+            # Check if it's a frame file
+            frame_match = frame_pattern.match(filename)
+            if frame_match:
+                frame_idx = int(frame_match.group(1))
+                try:
+                    # Download frame
+                    file_data = volume.read_file(f"{segment_path}/{filename}")
+                    img = Image.open(io.BytesIO(file_data))
+                    frames_dict[frame_idx] = img.copy()
+                    logger.debug(f"Downloaded frame {frame_idx}")
+                except Exception as e:
+                    logger.error(f"Failed to download {filename}: {e}")
+                continue
+            # Check if it's an alpha mask file
+            alpha_match = alpha_pattern.match(filename)
+            if alpha_match:
+                frame_idx = int(alpha_match.group(1))
+                try:
+                    # Download alpha mask
+                    file_data = volume.read_file(f"{segment_path}/{filename}")
+                    img = Image.open(io.BytesIO(file_data))
+                    masks_dict[frame_idx] = img.copy()
+                    logger.debug(f"Downloaded alpha mask {frame_idx}")
+                except Exception as e:
+                    logger.error(f"Failed to download {filename}: {e}")
+                continue
+        max_frame = max(frames_dict.keys()) if frames_dict else 0
+        logger.info(f"Downloaded {len(frames_dict)} frames and {len(masks_dict)} masks. Max frame: {max_frame}")
+        return frames_dict, masks_dict, max_frame
+    except Exception as e:
+        logger.error(f"Error downloading segment files: {e}")
+        return {}, {}, 0
+def composite_image_with_mask(frame: Image.Image, mask: Optional[Image.Image], show_mask: bool) -> Image.Image:
+    """
+    Composite the original frame with the alpha mask overlay.
+    Args:
+        frame: Original RGB/RGBA image
+        mask: Alpha mask (grayscale or RGBA)
+        show_mask: Whether to show the mask overlay
+    Returns:
+        Composited PIL Image
+    """
+    if not show_mask or mask is None:
+        return frame.copy()
+    # Convert frame to RGBA if needed
+    if frame.mode != 'RGBA':
+        frame_rgba = frame.convert('RGBA')
+    else:
+        frame_rgba = frame.copy()
+    # Convert mask to 'L' (grayscale) if needed
+    if mask.mode != 'L':
+        mask_gray = mask.convert('L')
+    else:
+        mask_gray = mask.copy()
+    # Resize mask to match frame if needed
+    if mask_gray.size != frame_rgba.size:
+        mask_gray = mask_gray.resize(frame_rgba.size, Image.Resampling.LANCZOS)
+    # Create a colored overlay (semi-transparent red)
+    overlay = Image.new('RGBA', frame_rgba.size, (255, 0, 0, 128))
+    # Use mask as alpha channel for the overlay
+    overlay.putalpha(mask_gray)
+    # Composite
+    result = Image.alpha_composite(frame_rgba, overlay)
+    return result
+def load_segment_frame(segment_id, frame_number, show_mask, magic_code_state, frames_state, masks_state):
     """
+    Load and display a specific frame with optional alpha mask overlay.
     """
+    if not segment_id or frames_state is None:
+        return None, gr.update()
+    frames_dict = frames_state
+    masks_dict = masks_state
+    frame_idx = int(frame_number)
+    if frame_idx not in frames_dict:
+        logger.warning(f"Frame {frame_idx} not found in downloaded frames")
+        return None, gr.update()
+    frame = frames_dict[frame_idx]
+    mask = masks_dict.get(frame_idx, None)
+    # Composite image with mask
+    result_image = composite_image_with_mask(frame, mask, show_mask)
+    logger.info(f"Loaded frame {frame_idx} with mask overlay: {show_mask}")
+    return result_image, gr.update()
+def handle_segment_selection(segment_id, magic_code):
     """
+    Handle segment selection: download all files and initialize the view.
     """
+    if not segment_id or not magic_code:
+        return None, gr.update(maximum=0, value=0), {}, {}, magic_code
+    logger.info(f"Segment selected: {segment_id}")
+    # Download all frames and masks
+    frames_dict, masks_dict, max_frame = download_segment_files(magic_code, segment_id)
+    if not frames_dict:
+        logger.error("No frames downloaded")
+        return None, gr.update(maximum=0, value=0), {}, {}, magic_code
+    # Load first frame
+    frame_0 = frames_dict.get(0, None)
+    if frame_0 is None:
+        # Use first available frame
+        frame_0 = frames_dict[min(frames_dict.keys())]
+    # Initial display without mask
+    result_image = composite_image_with_mask(frame_0, masks_dict.get(0, None), False)
+    return (
+        result_image,
+        gr.update(minimum=0, maximum=max_frame, value=0),
+        frames_dict,
+        masks_dict,
+        magic_code
+    )
+def handle_image_click(segment_id, frame_number, magic_code, frames_state, masks_state, evt: gr.SelectData):
+    """
+    Handle click on ImageEditor: send coordinates to SAM-3, get new mask, update display.
+    """
+    if not segment_id or frames_state is None or evt is None:
+        return None, masks_state
+    # Extract click coordinates
+    x, y = evt.index[0], evt.index[1]
+    frame_idx = int(frame_number)
+    logger.info(f"Click detected at ({x}, {y}) on frame {frame_idx}")
+    if frame_idx not in frames_state:
+        logger.error(f"Frame {frame_idx} not in state")
+        return None, masks_state
+    try:
+        # Get the original frame
+        frame = frames_state[frame_idx]
+        # Convert frame to bytes
+        img_byte_arr = io.BytesIO()
+        frame.save(img_byte_arr, format='PNG')
+        img_bytes = img_byte_arr.getvalue()
+        # Call Modal SAM-3 segmentation function
+        logger.info(f"Calling SAM-3 segmentation with coordinates ({x}, {y})")
+        try:
+            sam_function = modal.Function.from_name("Content-Moderation", "sam3_segmentation_function")
+            mask_bytes = sam_function.remote(image_bytes=img_bytes, x=x, y=y)
+            # Parse returned mask
+            new_mask = Image.open(io.BytesIO(mask_bytes))
+            # Update masks dict
+            masks_state[frame_idx] = new_mask.copy()
+            # Composite and return updated image (always show mask after click)
+            result_image = composite_image_with_mask(frame, new_mask, True)
+            logger.info(f"Successfully updated mask for frame {frame_idx}")
+            return result_image, masks_state
+        except Exception as e:
+            logger.error(f"Error calling SAM-3 function: {e}")
+            # Return current view without update
+            frame = frames_state[frame_idx]
+            mask = masks_state.get(frame_idx, None)
+            result_image = composite_image_with_mask(frame, mask, True)
+            return result_image, masks_state
+    except Exception as e:
+        logger.error(f"Error handling image click: {e}")
+        return None, masks_state
+def download_segment(segment_id, frames_state, masks_state):
+    """
+    Package and download the edited segment (frames + masks) as a ZIP file.
+    """
+    if not segment_id or not frames_state:
         return "No segment selected"
     logger.info(f"Download requested for segment: {segment_id}")
+    try:
+        # Create temporary directory for files
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Save all frames and masks
+            for frame_idx, frame_img in frames_state.items():
+                frame_path = os.path.join(tmpdir, f"frame_{frame_idx:06d}.png")
+                frame_img.save(frame_path)
+                if frame_idx in masks_state:
+                    mask_path = os.path.join(tmpdir, f"alpha_frame_{frame_idx:06d}.png")
+                    masks_state[frame_idx].save(mask_path)
+            # Create ZIP
+            import shutil
+            zip_path = f"/tmp/segment_{segment_id}.zip"
+            shutil.make_archive(zip_path.replace('.zip', ''), 'zip', tmpdir)
+            logger.info(f"Created ZIP at {zip_path}")
+            return zip_path
+    except Exception as e:
+        logger.error(f"Error creating download package: {e}")
+        return f"Error: {str(e)}"
 # Create a professional Gradio interface using the Golden ratio (1.618) for proportions
             with gr.Tab("Segmentation Editing"):
                 gr.Markdown("### Segmentation Editing Workspace")
+                # State management for session data
+                magic_code_state = gr.State(value=None)
+                frames_state = gr.State(value=None)
+                masks_state = gr.State(value=None)
                 # Row 1: Magic Code textbox + Submit button
                 with gr.Row(elem_classes="input-section"):
                     with gr.Column(scale=3):
                         info="Select a segment to edit"
                     )
+                # Row 3: Show Alpha Mask checkbox
+                with gr.Row(elem_classes="input-section", visible=False) as seg_row3_checkbox:
+                    seg_show_mask = gr.Checkbox(
+                        label="Show Alpha Mask",
+                        value=False,
+                        interactive=True
+                    )
+                # Row 4: ImageEditor component (SAM-3 style interaction)
+                with gr.Row(elem_classes="input-section", visible=False) as seg_row4:
                     seg_image_editor = gr.ImageEditor(
                         label="Image Click Segmentation",
                         type="pil",
                         brush=gr.Brush(colors=["#FF0000"], color_mode="fixed")
                     )
+                # Row 5: Frame number slider
+                with gr.Row(elem_classes="input-section", visible=False) as seg_row5:
                     seg_frame_slider = gr.Slider(
                         minimum=0,
                         maximum=100,
                         info="Select video frame to segment"
                     )
+                # Row 6: Download Segment button
+                with gr.Row(visible=False) as seg_row6:
                     seg_download_btn = gr.Button("Download Segment", variant="secondary")
+                    seg_download_file = gr.File(label="Download", visible=False)
     # Wire Content Moderation processing
     cm_process_btn.click(
     seg_submit_btn.click(
         fn=submit_magic_code,
         inputs=[seg_magic_code],
+        outputs=[seg_id_dropdown, seg_magic_code, seg_row2, seg_row3_checkbox, seg_row4, seg_row5, seg_row6]
     )
+    # Segment selection handler
     seg_id_dropdown.change(
+        fn=handle_segment_selection,
+        inputs=[seg_id_dropdown, seg_magic_code],
+        outputs=[seg_image_editor, seg_frame_slider, frames_state, masks_state, magic_code_state]
     )
+    # Frame slider handler
     seg_frame_slider.change(
         fn=load_segment_frame,
+        inputs=[seg_id_dropdown, seg_frame_slider, seg_show_mask, magic_code_state, frames_state, masks_state],
+        outputs=[seg_image_editor, seg_show_mask]
+    )
+    # Show mask checkbox handler
+    seg_show_mask.change(
+        fn=load_segment_frame,
+        inputs=[seg_id_dropdown, seg_frame_slider, seg_show_mask, magic_code_state, frames_state, masks_state],
+        outputs=[seg_image_editor, seg_show_mask]
+    )
+    # Image click handler (for SAM-3 segmentation)
+    seg_image_editor.select(
+        fn=handle_image_click,
+        inputs=[seg_id_dropdown, seg_frame_slider, magic_code_state, frames_state, masks_state],
+        outputs=[seg_image_editor, masks_state]
     )
+    # Download button handler
     seg_download_btn.click(
         fn=download_segment,
+        inputs=[seg_id_dropdown, frames_state, masks_state],
+        outputs=[seg_download_file]
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 modal
 gradio
 opencv-python-headless

 modal
 gradio
 opencv-python-headless
+Pillow
+numpy