Spaces:

usingcolor
/

MambaEye

Running on Zero

App Files Files Community

usingcolor commited on 21 days ago

Commit

81be487

1 Parent(s): df9d184

Use Xet for assets

Browse files

Files changed (5) hide show

.gitattributes +2 -0
app.py +31 -26
assets/dog.jpg +0 -0
assets/green_mamba.jpg +3 -0
assets/leo.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/green_mamba.jpg filter=lfs diff=lfs merge=lfs -text
+assets/leo.jpg filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -73,22 +73,22 @@ CSS_STYLE = """
 # -----------------------------
 def get_model():
-    global _GLOBAL_MODEL
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    if _GLOBAL_MODEL is None:
-        print(f"Downloading {MODEL_FILENAME} from {MODEL_REPO}...")
-        try:
-            checkpoint_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
-            model = MambaEye(**MODEL_CONFIG)
-            model.load_state_dict(torch.load(checkpoint_path, map_location=device, weights_only=True))
-            model.to(device)
-            model.eval()
-            _GLOBAL_MODEL = model
-            print("Model loaded successfully.")
-        except Exception as e:
-            print(f"Failed to load model: {e}")
-            raise
-    return _GLOBAL_MODEL, device
 def transfer_inference_params(params, device):
     if params is None or getattr(params, "key_value_memory_dict", None) is None:
@@ -248,7 +248,7 @@ def run_auto_scan(image, scan_pattern, sequence_length):
         state['x_offset'], state['y_offset'], state['h'], state['w']
     )
-    return img_display, format_predictions(final_probs), state, f"Auto Scan Complete. Extracted {sequence_length} patches. Click to add more!"
 @spaces.GPU
 def process_click_inference(x_orig, y_orig, original_image, state):
@@ -269,6 +269,9 @@ def process_click_inference(x_orig, y_orig, original_image, state):
     canvas_y = int(x_orig * ratio) + state['y_offset']
     canvas_x = int(y_orig * ratio) + state['x_offset']
     # 1px flexible precision anchoring the patch directly onto the exact center click
     px = max(0, min(int(canvas_x - PATCH_SIZE / 2), TARGET_CANVAS_SIZE - PATCH_SIZE))
     py = max(0, min(int(canvas_y - PATCH_SIZE / 2), TARGET_CANVAS_SIZE - PATCH_SIZE))
@@ -298,7 +301,7 @@ def process_click_inference(x_orig, y_orig, original_image, state):
         state['x_offset'], state['y_offset'], state['h'], state['w']
     )
-    return img_display, format_predictions(final_probs), state, f"Added patch {state['sequence_length']} (Total {state['inference_params'].seqlen_offset} steps)."
 def on_click(evt: gr.SelectData, original_image, state):
     x_orig, y_orig = evt.index
@@ -306,26 +309,27 @@ def on_click(evt: gr.SelectData, original_image, state):
 def on_upload(image):
     if image is None:
-        return None, None, {"Waiting...": 1.0}, None, "Upload Image"
     # Pre-render the grey background immediately on upload
     grey_base = Image.fromarray(image).convert("L").convert("RGB")
     grey_base_np = (np.array(grey_base).astype(float) * 0.4 + 160).clip(0, 255).astype(np.uint8)
-    return grey_base_np, image, {"Click Auto Scan or click the image": 1.0}, None, "Ready. You can Auto Scan or click."
 def on_clear(original_image):
     if original_image is None:
-        return None, {"Cleared": 1.0}, None, "Cleared"
     grey_base = Image.fromarray(original_image).convert("L").convert("RGB")
     grey_base_np = (np.array(grey_base).astype(float) * 0.4 + 160).clip(0, 255).astype(np.uint8)
-    return grey_base_np, {"Cleared": 1.0}, init_state_for_image(original_image), "Selections cleared. Ready for new patch sequence."
 with gr.Blocks(title="MambaEye Interactive Demo", css=CSS_STYLE) as demo:
     gr.Markdown("# MambaEye Interactive Inference Demo")
-    gr.Markdown("This interface incorporates the full **MambaEye-base** model natively.")
     with gr.Row():
         with gr.Column(scale=2):
@@ -346,6 +350,7 @@ with gr.Blocks(title="MambaEye Interactive Demo", css=CSS_STYLE) as demo:
         with gr.Column(scale=1):
             model_output_label = gr.Label(label="MambaEye Output Predictions", num_top_classes=5)
             status_text = gr.Markdown("Status: Waiting for image upload...")
             state = gr.State(None)
@@ -354,25 +359,25 @@ with gr.Blocks(title="MambaEye Interactive Demo", css=CSS_STYLE) as demo:
     input_image.upload(
         fn=on_upload,
         inputs=[input_image],
-        outputs=[input_image, original_image_state, model_output_label, state, status_text]
     )
     auto_btn.click(
         fn=run_auto_scan,
         inputs=[original_image_state, scan_pattern, seq_length],
-        outputs=[input_image, model_output_label, state, status_text]
     )
     input_image.select(
         fn=on_click,
         inputs=[original_image_state, state],
-        outputs=[input_image, model_output_label, state, status_text]
     )
     clear_btn.click(
         fn=on_clear,
         inputs=[original_image_state],
-        outputs=[input_image, model_output_label, state, status_text]
     )
 if __name__ == "__main__":

 # -----------------------------
 def get_model():
+    # As the @spaces.GPU worker natively forks off, it effortlessly snags the _GLOBAL_CPU_MODEL reference
+    # directly passing its exact tensor parameters perfectly over exactly across PCI-e into active VRAM!
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    _GLOBAL_CPU_MODEL.to(device)
+    return _GLOBAL_CPU_MODEL, device
+# --- FALLBACK CSS INJECTION ---
+# We use a CSS override to display a precision crosshair since custom dynamic HTML div overlays
+# are deeply rejected by Gradio's internal Canvas shadow properties.
+CSS_STYLE = """
+.gradio-image-hook, .gradio-image-hook * {
+    cursor: crosshair !important;
+}
+"""
+# --- HOVER SCRIPT INJECTION ---
 def transfer_inference_params(params, device):
     if params is None or getattr(params, "key_value_memory_dict", None) is None:
         state['x_offset'], state['y_offset'], state['h'], state['w']
     )
+    return img_display, format_predictions(final_probs), state, f"Auto Scan Complete. Extracted {sequence_length} patches. Click to add more!", sequence_length
 @spaces.GPU
 def process_click_inference(x_orig, y_orig, original_image, state):
     canvas_y = int(x_orig * ratio) + state['y_offset']
     canvas_x = int(y_orig * ratio) + state['x_offset']
+    # 1px flexible precision anchoring the patch directly onto the exact center click
+    px = max(0, min(int(canvas_x - PATCH_SIZE / 2), TARGET_CANVAS_SIZE - PATCH_SIZE))
+    py = max(0, min(int(canvas_y - PATCH_SIZE / 2), TARGET_CANVAS_SIZE - PATCH_SIZE))
     # 1px flexible precision anchoring the patch directly onto the exact center click
     px = max(0, min(int(canvas_x - PATCH_SIZE / 2), TARGET_CANVAS_SIZE - PATCH_SIZE))
     py = max(0, min(int(canvas_y - PATCH_SIZE / 2), TARGET_CANVAS_SIZE - PATCH_SIZE))
         state['x_offset'], state['y_offset'], state['h'], state['w']
     )
+    return img_display, format_predictions(final_probs), state, f"Added patch {state['sequence_length']} (Total {state['inference_params'].seqlen_offset} steps).", state['sequence_length']
 def on_click(evt: gr.SelectData, original_image, state):
     x_orig, y_orig = evt.index
 def on_upload(image):
     if image is None:
+        return None, None, {"Waiting...": 1.0}, None, "Upload Image", 0
     # Pre-render the grey background immediately on upload
     grey_base = Image.fromarray(image).convert("L").convert("RGB")
     grey_base_np = (np.array(grey_base).astype(float) * 0.4 + 160).clip(0, 255).astype(np.uint8)
+    return grey_base_np, image, {"Click Auto Scan or click the image": 1.0}, None, "Ready. You can Auto Scan or click.", 0
 def on_clear(original_image):
     if original_image is None:
+        return None, {"Cleared": 1.0}, None, "Cleared", 0
     grey_base = Image.fromarray(original_image).convert("L").convert("RGB")
     grey_base_np = (np.array(grey_base).astype(float) * 0.4 + 160).clip(0, 255).astype(np.uint8)
+    return grey_base_np, {"Cleared": 1.0}, init_state_for_image(original_image), "Selections cleared. Ready for new patch sequence.", 0
+with gr.Blocks(title="MambaEye Interactive Demo", css=CSS_STYLE) as demo:
 with gr.Blocks(title="MambaEye Interactive Demo", css=CSS_STYLE) as demo:
     gr.Markdown("# MambaEye Interactive Inference Demo")
+    gr.Markdown("This interface incorporates the full **MambaEye-base** model natively.\n\n**Note**: The first inference or Auto Scan may take **1~2 minutes** to compile CUDA kernels and build hardware cache. Subsequent patch clicks will be dramatically faster!")
     with gr.Row():
         with gr.Column(scale=2):
         with gr.Column(scale=1):
             model_output_label = gr.Label(label="MambaEye Output Predictions", num_top_classes=5)
+            seq_len_display = gr.Number(label="Total Sequenced Patches", value=0, interactive=False)
             status_text = gr.Markdown("Status: Waiting for image upload...")
             state = gr.State(None)
     input_image.upload(
         fn=on_upload,
         inputs=[input_image],
+        outputs=[input_image, original_image_state, model_output_label, state, status_text, seq_len_display]
     )
     auto_btn.click(
         fn=run_auto_scan,
         inputs=[original_image_state, scan_pattern, seq_length],
+        outputs=[input_image, model_output_label, state, status_text, seq_len_display]
     )
     input_image.select(
         fn=on_click,
         inputs=[original_image_state, state],
+        outputs=[input_image, model_output_label, state, status_text, seq_len_display]
     )
     clear_btn.click(
         fn=on_clear,
         inputs=[original_image_state],
+        outputs=[input_image, model_output_label, state, status_text, seq_len_display]
     )
 if __name__ == "__main__":

assets/dog.jpg ADDED Viewed

assets/green_mamba.jpg ADDED Viewed

Git LFS Details

SHA256: 1d269ad4a9cbc7283b6c34fb4cce2cce9e3be503d140476a8c0f8c62d93a2175
Pointer size: 132 Bytes
Size of remote file: 1.06 MB

assets/leo.jpg ADDED Viewed

Git LFS Details

SHA256: 70ce1fd8334e58776b96c89ceaae9fa26c9533be05b2e3d8d129ba097228b2f6
Pointer size: 131 Bytes
Size of remote file: 209 kB