Spaces:

bsod-tv
/

Localization-Quality-Control

Sleeping

denizaybey commited on Mar 17

Commit

050c72c

2 Parent(s): ad90bbf bd7ca9e

Merge branch 'feat/download-segment-zip' into 'main'

Add Segmentation Editing tab with 5-row layout

See merge request sonne-technology/bsod-tv/waveform-matching-gradio-front-end!27

Files changed (2) hide show

app.py +556 -27
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -8,11 +8,16 @@
 import os
 import re
 import cv2
 import time
 import base64
 import modal
 import logging
 import gradio as gr
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
@@ -189,6 +194,7 @@ def submit_magic_code(magic_code):
                     gr.update(visible=False),
                     gr.update(visible=False),
                     gr.update(visible=False),
                     gr.update(visible=False)
                 )
@@ -199,6 +205,7 @@ def submit_magic_code(magic_code):
                 gr.update(visible=True),
                 gr.update(visible=True),
                 gr.update(visible=True),
                 gr.update(visible=True)
             )
@@ -210,6 +217,7 @@ def submit_magic_code(magic_code):
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False)
             )
@@ -221,33 +229,360 @@ def submit_magic_code(magic_code):
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False)
         )
-def load_segment_frame(segment_id, frame_number):
     """
-    Placeholder function to load a specific frame from a segment for editing.
-    TODO: Connect to backend API to retrieve frame image from video segment.
     """
-    if not segment_id:
-        return None
-    logger.info(f"Loading segment: {segment_id}, frame: {frame_number}")
-    # Placeholder: Return None (no image loaded yet)
-    return None
-def download_segment(segment_id, edited_image, frame_number):
     """
-    Placeholder function to download edited segment.
-    TODO: Connect to backend API to save edited segmentation and download result.
     """
     if not segment_id:
-        return "No segment selected"
     logger.info(f"Download requested for segment: {segment_id}")
-    return f"Download functionality coming soon for {segment_id}"
 # Create a professional Gradio interface using the Golden ratio (1.618) for proportions
@@ -436,6 +771,53 @@ label {
         padding: 15px;
     }
 }
 """
 # Create a Blocks interface for more customization
@@ -480,6 +862,11 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
             with gr.Tab("Segmentation Editing"):
                 gr.Markdown("### Segmentation Editing Workspace")
                 # Row 1: Magic Code textbox + Submit button
                 with gr.Row(elem_classes="input-section"):
                     with gr.Column(scale=3):
@@ -500,8 +887,16 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
                         info="Select a segment to edit"
                     )
-                # Row 3: ImageEditor component (SAM-3 style interaction)
-                with gr.Row(elem_classes="input-section", visible=False) as seg_row3:
                     seg_image_editor = gr.ImageEditor(
                         label="Image Click Segmentation",
                         type="pil",
@@ -509,8 +904,8 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
                         brush=gr.Brush(colors=["#FF0000"], color_mode="fixed")
                     )
-                # Row 4: Frame number slider
-                with gr.Row(elem_classes="input-section", visible=False) as seg_row4:
                     seg_frame_slider = gr.Slider(
                         minimum=0,
                         maximum=100,
@@ -521,9 +916,71 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
                         info="Select video frame to segment"
                     )
-                # Row 5: Download Segment button
-                with gr.Row(visible=False) as seg_row5:
                     seg_download_btn = gr.Button("Download Segment", variant="secondary")
     # Wire Content Moderation processing
     cm_process_btn.click(
@@ -536,25 +993,97 @@ with gr.Blocks(css=css, theme=gr.themes.Soft(primary_hue="indigo", secondary_hue
     seg_submit_btn.click(
         fn=submit_magic_code,
         inputs=[seg_magic_code],
-        outputs=[seg_id_dropdown, seg_magic_code, seg_row2, seg_row3, seg_row4, seg_row5]
     )
     seg_id_dropdown.change(
-        fn=load_segment_frame,
-        inputs=[seg_id_dropdown, seg_frame_slider],
-        outputs=[seg_image_editor]
     )
     seg_frame_slider.change(
         fn=load_segment_frame,
-        inputs=[seg_id_dropdown, seg_frame_slider],
-        outputs=[seg_image_editor]
     )
     seg_download_btn.click(
         fn=download_segment,
-        inputs=[seg_id_dropdown, seg_image_editor, seg_frame_slider],
-        outputs=[seg_magic_code]  # Display status message in magic code textbox
     )
 if __name__ == "__main__":

 import os
 import re
 import cv2
+import io
 import time
 import base64
 import modal
 import logging
+import tempfile
+import numpy as np
 import gradio as gr
+from PIL import Image
+from typing import Dict, Optional, Tuple
 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger(__name__)
                     gr.update(visible=False),
                     gr.update(visible=False),
                     gr.update(visible=False),
+                    gr.update(visible=False),
                     gr.update(visible=False)
                 )
                 gr.update(visible=True),
                 gr.update(visible=True),
                 gr.update(visible=True),
+                gr.update(visible=True),
                 gr.update(visible=True)
             )
                 gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=False),
+                gr.update(visible=False),
                 gr.update(visible=False)
             )
             gr.update(visible=False),
             gr.update(visible=False),
             gr.update(visible=False),
+            gr.update(visible=False),
             gr.update(visible=False)
         )
+def download_segment_files(magic_code: str, segment_id: str) -> Tuple[Dict[int, Image.Image], Dict[int, Image.Image], int]:
     """
+    Download all frame images and alpha masks for a given segment from Modal volume.
+    Returns:
+        frames_dict: Dict mapping frame_index -> PIL.Image (original frames)
+        masks_dict: Dict mapping frame_index -> PIL.Image (alpha masks)
+        max_frame: Maximum frame index found
     """
+    try:
+        modal_volume_name = os.environ['MODERATION_MODAL_VOLUME']
+        volume = modal.Volume.from_name(modal_volume_name)
+        segment_path = f"/{magic_code}/{segment_id}"
+        frames_dict = {}
+        masks_dict = {}
+        logger.info(f"Downloading files from {segment_path}")
+        # List all files in the segment directory
+        try:
+            files = list(volume.listdir(segment_path))
+        except Exception as e:
+            logger.error(f"Failed to list segment directory: {e}")
+            return {}, {}, 0
+        # Parse filenames and download
+        frame_pattern = re.compile(r'^frame_(\d+)\.(jpg|png)$')
+        alpha_pattern = re.compile(r'^alpha_frame_(\d+)\.png$')
+        for entry in files:
+            if entry.type != modal.volume.FileEntryType.FILE:
+                continue
+            filename = os.path.basename(entry.path)
+            # Check if it's a frame file
+            frame_match = frame_pattern.match(filename)
+            if frame_match:
+                frame_idx = int(frame_match.group(1))
+                try:
+                    # Download frame
+                    file_data = volume.read_file(f"{segment_path}/{filename}")
+                    img = Image.open(io.BytesIO(file_data))
+                    frames_dict[frame_idx] = img.copy()
+                    logger.debug(f"Downloaded frame {frame_idx}")
+                except Exception as e:
+                    logger.error(f"Failed to download {filename}: {e}")
+                continue
+            # Check if it's an alpha mask file
+            alpha_match = alpha_pattern.match(filename)
+            if alpha_match:
+                frame_idx = int(alpha_match.group(1))
+                try:
+                    # Download alpha mask
+                    file_data = volume.read_file(f"{segment_path}/{filename}")
+                    img = Image.open(io.BytesIO(file_data))
+                    masks_dict[frame_idx] = img.copy()
+                    logger.debug(f"Downloaded alpha mask {frame_idx}")
+                except Exception as e:
+                    logger.error(f"Failed to download {filename}: {e}")
+                continue
+        max_frame = max(frames_dict.keys()) if frames_dict else 0
+        logger.info(f"Downloaded {len(frames_dict)} frames and {len(masks_dict)} masks. Max frame: {max_frame}")
+        return frames_dict, masks_dict, max_frame
+    except Exception as e:
+        logger.error(f"Error downloading segment files: {e}")
+        return {}, {}, 0
+def composite_image_with_mask(frame: Image.Image, mask: Optional[Image.Image], show_mask: bool) -> Image.Image:
+    """
+    Composite the original frame with the alpha mask overlay.
+    Args:
+        frame: Original RGB/RGBA image
+        mask: Alpha mask (grayscale or RGBA)
+        show_mask: Whether to show the mask overlay
+    Returns:
+        Composited PIL Image
+    """
+    if not show_mask or mask is None:
+        return frame.copy()
+    # Convert frame to RGBA if needed
+    if frame.mode != 'RGBA':
+        frame_rgba = frame.convert('RGBA')
+    else:
+        frame_rgba = frame.copy()
+    # Convert mask to 'L' (grayscale) if needed
+    if mask.mode != 'L':
+        mask_gray = mask.convert('L')
+    else:
+        mask_gray = mask.copy()
+    # Resize mask to match frame if needed
+    if mask_gray.size != frame_rgba.size:
+        mask_gray = mask_gray.resize(frame_rgba.size, Image.Resampling.LANCZOS)
+    # Create a colored overlay (semi-transparent red)
+    overlay = Image.new('RGBA', frame_rgba.size, (255, 0, 0, 128))
+    # Use mask as alpha channel for the overlay
+    overlay.putalpha(mask_gray)
+    # Composite
+    result = Image.alpha_composite(frame_rgba, overlay)
+    return result
+def load_segment_frame(segment_id, frame_number, show_mask, magic_code_state, frames_state, masks_state):
+    """
+    Load and display a specific frame with optional alpha mask overlay.
+    """
+    if not segment_id or frames_state is None:
+        return None, gr.update()
+    frames_dict = frames_state
+    masks_dict = masks_state
+    frame_idx = int(frame_number)
+    if frame_idx not in frames_dict:
+        logger.warning(f"Frame {frame_idx} not found in downloaded frames")
+        return None, gr.update()
+    frame = frames_dict[frame_idx]
+    mask = masks_dict.get(frame_idx, None)
+    # Composite image with mask
+    result_image = composite_image_with_mask(frame, mask, show_mask)
+    logger.info(f"Loaded frame {frame_idx} with mask overlay: {show_mask}")
+    return result_image, gr.update()
+def handle_keyboard_navigation(key_code, segment_id, current_frame, show_mask, magic_code_state, frames_state, masks_state):
+    """
+    Handle left/right arrow key navigation for frame slider.
+    Args:
+        key_code: JavaScript key code ('ArrowLeft' or 'ArrowRight')
+        segment_id: Current segment ID
+        current_frame: Current frame number
+        show_mask: Whether to show alpha mask overlay
+        magic_code_state: Magic code state
+        frames_state: Frames dictionary state
+        masks_state: Masks dictionary state
+    Returns:
+        Tuple of (updated image, updated slider value)
+    """
+    if not segment_id or frames_state is None:
+        return None, gr.update()
+    frames_dict = frames_state
+    masks_dict = masks_state
+    # Get min/max from available frames
+    available_frames = sorted(frames_dict.keys())
+    if not available_frames:
+        return None, gr.update()
+    min_frame = available_frames[0]
+    max_frame = available_frames[-1]
+    # Calculate new frame number
+    new_frame = int(current_frame)
+    if key_code == 'ArrowLeft':
+        new_frame = max(min_frame, new_frame - 1)
+    elif key_code == 'ArrowRight':
+        new_frame = min(max_frame, new_frame + 1)
+    else:
+        # Unknown key, no change
+        return None, gr.update()
+    # If frame didn't change (at boundary), return early
+    if new_frame == int(current_frame):
+        return None, gr.update()
+    logger.info(f"Keyboard navigation: {key_code} -> frame {new_frame}")
+    # Load the new frame using existing logic
+    if new_frame not in frames_dict:
+        logger.warning(f"Frame {new_frame} not found in downloaded frames")
+        return None, gr.update()
+    frame = frames_dict[new_frame]
+    mask = masks_dict.get(new_frame, None)
+    # Composite image with mask
+    result_image = composite_image_with_mask(frame, mask, show_mask)
+    # Return updated image and new slider value
+    return result_image, gr.update(value=new_frame)
+def handle_segment_selection(segment_id, magic_code):
+    """
+    Handle segment selection: download all files and initialize the view.
+    """
+    if not segment_id or not magic_code:
+        return None, gr.update(maximum=0, value=0), {}, {}, magic_code, gr.update(interactive=False)
+    logger.info(f"Segment selected: {segment_id}")
+    # Download all frames and masks
+    frames_dict, masks_dict, max_frame = download_segment_files(magic_code, segment_id)
+    if not frames_dict:
+        logger.error("No frames downloaded")
+        return None, gr.update(maximum=0, value=0), {}, {}, magic_code, gr.update(interactive=False)
+    # Load first frame
+    frame_0 = frames_dict.get(0, None)
+    if frame_0 is None:
+        # Use first available frame
+        frame_0 = frames_dict[min(frames_dict.keys())]
+    # Initial display without mask
+    result_image = composite_image_with_mask(frame_0, masks_dict.get(0, None), False)
+    # Enable download button only if masks are available
+    has_masks = len(masks_dict) > 0
+    return (
+        result_image,
+        gr.update(minimum=0, maximum=max_frame, value=0),
+        frames_dict,
+        masks_dict,
+        magic_code,
+        gr.update(interactive=has_masks)
+    )
+def handle_image_click(segment_id, frame_number, magic_code, frames_state, masks_state, evt: gr.SelectData):
     """
+    Handle click on ImageEditor: send coordinates to SAM-3, get new mask, update display.
+    """
+    if not segment_id or frames_state is None or evt is None:
+        return None, masks_state, gr.update()
+    # Extract click coordinates
+    x, y = evt.index[0], evt.index[1]
+    frame_idx = int(frame_number)
+    logger.info(f"Click detected at ({x}, {y}) on frame {frame_idx}")
+    if frame_idx not in frames_state:
+        logger.error(f"Frame {frame_idx} not in state")
+        return None, masks_state, gr.update()
+    try:
+        # Get the original frame
+        frame = frames_state[frame_idx]
+        # Convert frame to bytes
+        img_byte_arr = io.BytesIO()
+        frame.save(img_byte_arr, format='PNG')
+        img_bytes = img_byte_arr.getvalue()
+        # Call Modal SAM-3 segmentation function
+        logger.info(f"Calling SAM-3 segmentation with coordinates ({x}, {y})")
+        try:
+            sam_function = modal.Function.from_name("Content-Moderation", "sam3_segmentation_function")
+            mask_bytes = sam_function.remote(image_bytes=img_bytes, x=x, y=y)
+            # Parse returned mask
+            new_mask = Image.open(io.BytesIO(mask_bytes))
+            # Update masks dict
+            masks_state[frame_idx] = new_mask.copy()
+            # Composite and return updated image (always show mask after click)
+            result_image = composite_image_with_mask(frame, new_mask, True)
+            logger.info(f"Successfully updated mask for frame {frame_idx}")
+            # Enable download button since we now have at least one mask
+            return result_image, masks_state, gr.update(interactive=True)
+        except Exception as e:
+            logger.error(f"Error calling SAM-3 function: {e}")
+            # Return current view without update
+            frame = frames_state[frame_idx]
+            mask = masks_state.get(frame_idx, None)
+            result_image = composite_image_with_mask(frame, mask, True)
+            return result_image, masks_state, gr.update()
+    except Exception as e:
+        logger.error(f"Error handling image click: {e}")
+        return None, masks_state, gr.update()
+def download_segment(segment_id, frames_state, masks_state):
+    """
+    Package and download only the alpha masks for the selected segment as a ZIP file.
+    ZIP filename will be {segment_id}.zip.
+    Returns:
+        Tuple of (status_message, file_path, status_visibility)
     """
     if not segment_id:
+        return gr.update(value="No segment selected", visible=True), None, gr.update(visible=True)
+    if not masks_state:
+        logger.warning(f"No alpha masks available for segment: {segment_id}")
+        return gr.update(value="No alpha masks available", visible=True), None, gr.update(visible=True)
     logger.info(f"Download requested for segment: {segment_id}")
+    try:
+        import shutil
+        # Create temporary directory for alpha mask files only
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Save only alpha masks
+            for frame_idx, mask_img in masks_state.items():
+                mask_path = os.path.join(tmpdir, f"alpha_frame_{frame_idx:06d}.png")
+                mask_img.save(mask_path)
+            # Create ZIP with segment UUID as filename
+            zip_path = f"/tmp/{segment_id}.zip"
+            shutil.make_archive(zip_path.replace('.zip', ''), 'zip', tmpdir)
+            logger.info(f"Created ZIP at {zip_path} with {len(masks_state)} alpha masks")
+            return (
+                gr.update(value=f"✓ Downloaded {len(masks_state)} alpha masks", visible=True),
+                zip_path,
+                gr.update(visible=True)
+            )
+    except Exception as e:
+        logger.error(f"Error creating download package: {e}")
+        return (
+            gr.update(value=f"Error: {str(e)}", visible=True),
+            None,
+            gr.update(visible=True)
+        )
 # Create a professional Gradio interface using the Golden ratio (1.618) for proportions
         padding: 15px;
     }
 }
+/* Loading modal overlay */
+#download-loading-modal {
+    display: none;
+    position: fixed;
+    top: 0;
+    left: 0;
+    width: 100%;
+    height: 100%;
+    background-color: rgba(0, 0, 0, 0.7);
+    z-index: 9999;
+    justify-content: center;
+    align-items: center;
+}
+#download-loading-modal.show {
+    display: flex;
+}
+.loading-content {
+    background-color: var(--card-bg);
+    padding: 40px;
+    border-radius: var(--border-radius);
+    text-align: center;
+    box-shadow: 0 10px 40px rgba(0, 0, 0, 0.5);
+}
+.spinner {
+    border: 4px solid var(--border-color);
+    border-top: 4px solid var(--primary-color);
+    border-radius: 50%;
+    width: 50px;
+    height: 50px;
+    animation: spin 1s linear infinite;
+    margin: 0 auto 20px;
+}
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
+.loading-text {
+    color: var(--text-color);
+    font-size: 18px;
+    font-weight: 500;
+}
 """
 # Create a Blocks interface for more customization
             with gr.Tab("Segmentation Editing"):
                 gr.Markdown("### Segmentation Editing Workspace")
+                # State management for session data
+                magic_code_state = gr.State(value=None)
+                frames_state = gr.State(value=None)
+                masks_state = gr.State(value=None)
                 # Row 1: Magic Code textbox + Submit button
                 with gr.Row(elem_classes="input-section"):
                     with gr.Column(scale=3):
                         info="Select a segment to edit"
                     )
+                # Row 3: Show Alpha Mask checkbox
+                with gr.Row(elem_classes="input-section", visible=False) as seg_row3_checkbox:
+                    seg_show_mask = gr.Checkbox(
+                        label="Show Alpha Mask",
+                        value=False,
+                        interactive=True
+                    )
+                # Row 4: ImageEditor component (SAM-3 style interaction)
+                with gr.Row(elem_classes="input-section", visible=False) as seg_row4:
                     seg_image_editor = gr.ImageEditor(
                         label="Image Click Segmentation",
                         type="pil",
                         brush=gr.Brush(colors=["#FF0000"], color_mode="fixed")
                     )
+                # Row 5: Frame number slider
+                with gr.Row(elem_classes="input-section", visible=False) as seg_row5:
                     seg_frame_slider = gr.Slider(
                         minimum=0,
                         maximum=100,
                         info="Select video frame to segment"
                     )
+                # Row 6: Download Segment button
+                with gr.Row(visible=False) as seg_row6:
                     seg_download_btn = gr.Button("Download Segment", variant="secondary")
+                    seg_download_status = gr.Textbox(label="Status", value="", visible=False, interactive=False)
+                    seg_download_file = gr.File(label="Download", visible=False)
+                # Hidden component for keyboard event capture
+                seg_keyboard_input = gr.Textbox(visible=False, elem_id="seg_keyboard_input")
+                # Loading modal HTML
+                gr.HTML("""
+                    <div id="download-loading-modal">
+                        <div class="loading-content">
+                            <div class="spinner"></div>
+                            <div class="loading-text">Preparing download…</div>
+                        </div>
+                    </div>
+                    <script>
+                        function showDownloadLoading() {
+                            const modal = document.getElementById('download-loading-modal');
+                            if (modal) modal.classList.add('show');
+                        }
+                        function hideDownloadLoading() {
+                            const modal = document.getElementById('download-loading-modal');
+                            if (modal) modal.classList.remove('show');
+                        }
+                        // Listen for download button clicks
+                        document.addEventListener('DOMContentLoaded', function() {
+                            const checkButton = setInterval(function() {
+                                const downloadBtn = document.querySelector('button:has-text("Download Segment")');
+                                if (!downloadBtn) {
+                                    // Fallback: find button by content
+                                    const buttons = document.querySelectorAll('button');
+                                    for (let btn of buttons) {
+                                        if (btn.textContent.includes('Download Segment')) {
+                                            setupDownloadButton(btn);
+                                            clearInterval(checkButton);
+                                            break;
+                                        }
+                                    }
+                                } else {
+                                    setupDownloadButton(downloadBtn);
+                                    clearInterval(checkButton);
+                                }
+                            }, 500);
+                            function setupDownloadButton(btn) {
+                                btn.addEventListener('click', function() {
+                                    showDownloadLoading();
+                                    // Hide modal after function completes (watch for Gradio loading to finish)
+                                    setTimeout(function checkLoading() {
+                                        const gradioLoading = document.querySelector('.loading');
+                                        if (!gradioLoading) {
+                                            setTimeout(hideDownloadLoading, 500);
+                                        } else {
+                                            setTimeout(checkLoading, 200);
+                                        }
+                                    }, 200);
+                                });
+                            }
+                        });
+                    </script>
+                """)
     # Wire Content Moderation processing
     cm_process_btn.click(
     seg_submit_btn.click(
         fn=submit_magic_code,
         inputs=[seg_magic_code],
+        outputs=[seg_id_dropdown, seg_magic_code, seg_row2, seg_row3_checkbox, seg_row4, seg_row5, seg_row6]
     )
+    # Segment selection handler
     seg_id_dropdown.change(
+        fn=handle_segment_selection,
+        inputs=[seg_id_dropdown, seg_magic_code],
+        outputs=[seg_image_editor, seg_frame_slider, frames_state, masks_state, magic_code_state, seg_download_btn]
     )
+    # Frame slider handler
     seg_frame_slider.change(
         fn=load_segment_frame,
+        inputs=[seg_id_dropdown, seg_frame_slider, seg_show_mask, magic_code_state, frames_state, masks_state],
+        outputs=[seg_image_editor, seg_show_mask]
     )
+    # Show mask checkbox handler
+    seg_show_mask.change(
+        fn=load_segment_frame,
+        inputs=[seg_id_dropdown, seg_frame_slider, seg_show_mask, magic_code_state, frames_state, masks_state],
+        outputs=[seg_image_editor, seg_show_mask]
+    )
+    # Image click handler (for SAM-3 segmentation)
+    seg_image_editor.select(
+        fn=handle_image_click,
+        inputs=[seg_id_dropdown, seg_frame_slider, magic_code_state, frames_state, masks_state],
+        outputs=[seg_image_editor, masks_state, seg_download_btn]
+    )
+    # Download button handler
     seg_download_btn.click(
         fn=download_segment,
+        inputs=[seg_id_dropdown, frames_state, masks_state],
+        outputs=[seg_download_status, seg_download_file, seg_download_status]
+    )
+    # Keyboard navigation handler
+    seg_keyboard_input.change(
+        fn=handle_keyboard_navigation,
+        inputs=[
+            seg_keyboard_input,
+            seg_id_dropdown,
+            seg_frame_slider,
+            seg_show_mask,
+            magic_code_state,
+            frames_state,
+            masks_state
+        ],
+        outputs=[seg_image_editor, seg_frame_slider]
+    )
+    # Add JavaScript to capture arrow key events
+    demo.load(
+        None,
+        None,
+        None,
+        js="""
+        () => {
+            // Wait for the DOM to be ready
+            setTimeout(() => {
+                const keyboardInput = document.getElementById('seg_keyboard_input');
+                if (!keyboardInput) {
+                    console.warn('Keyboard input element not found');
+                    return;
+                }
+                // Add keydown listener to document
+                document.addEventListener('keydown', (e) => {
+                    // Only handle arrow keys
+                    if (e.key === 'ArrowLeft' || e.key === 'ArrowRight') {
+                        // Check if we're in the Segmentation Editing tab
+                        const segTab = document.querySelector('[id*="segmentation-editing"]');
+                        const activeTab = document.querySelector('.tab-nav button.selected');
+                        if (activeTab && activeTab.textContent.includes('Segmentation Editing')) {
+                            e.preventDefault();
+                            // Update the hidden input to trigger the change event
+                            const textarea = keyboardInput.querySelector('textarea');
+                            if (textarea) {
+                                textarea.value = e.key;
+                                textarea.dispatchEvent(new Event('input', { bubbles: true }));
+                            }
+                        }
+                    }
+                });
+            }, 1000);
+        }
+        """
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 modal
 gradio
 opencv-python-headless

 modal
 gradio
 opencv-python-headless
+Pillow
+numpy