Spaces:

vikashmakeit
/

garment-to-pattern

Running

App Files Files Community

vikashmakeit commited on 13 days ago

Commit

70dedbc

verified ·

1 Parent(s): a956c3b

Fix VLM models: use actual vision models with correct providers (Qwen2.5-VL, Gemma-4 via novita, Kimi via fireworks-ai)

Browse files

Files changed (1) hide show

app.py +52 -110

app.py CHANGED Viewed

@@ -65,10 +65,17 @@ Apply the edit and return ONLY the complete updated JSON (no markdown, no explan
   }}
 }}"""
 VISION_MODELS = [
-    ("Qwen/Qwen3.5-9B", "together", "Qwen 3.5 9B"),
-    ("google/gemma-4-31B-it", "together", "Gemma 4 31B"),
-    ("moonshotai/Kimi-K2.5", "together", "Kimi K2.5"),
 ]
 def _extract_response_text(message):
@@ -104,7 +111,7 @@ def _call_vlm(messages, timeout=180):
                 analysis['_model_used'] = display_name
                 print(f"[VLM] OK: {display_name} detected {analysis.get('garment_type','?')}")
                 return analysis
-            else: print(f"[VLM] {display_name}: HTTP {response.status_code}")
         except Exception as e:
             print(f"[VLM] {display_name} failed: {e}"); continue
     return None
@@ -174,8 +181,7 @@ def process_image(image, garment_type_override="Auto-detect"):
         p2d, p3d, summary, j = _generate_all_outputs(analysis)
         return p2d, p3d, summary, j, []
     except Exception as e:
-        traceback.print_exc()
-        return None, None, f"Error: {e}", "{}", []
 def process_text(description):
     if not description.strip(): return None, None, "Enter a description.", "{}", []
@@ -187,8 +193,7 @@ def process_text(description):
     if analysis is None:
         desc_lower = description.lower()
         for gt in ['hoodie','jacket','coat','blazer','dress','skirt','pants','trousers','jeans','vest','shirt','blouse','top']:
-            if gt in desc_lower:
-                analysis = get_default_analysis(gt); analysis['description'] = description; break
         if analysis is None: analysis = get_default_analysis("shirt"); analysis['description'] = description
     _current_analysis["data"] = copy.deepcopy(analysis)
     try:
@@ -215,8 +220,7 @@ def chat_edit(message, history):
         try: updated = _call_vlm([{"role": "user", "content": edit_prompt}], timeout=90)
         except: pass
     if updated is None:
-        updated = copy.deepcopy(current)
-        msg_lower = message.lower()
         if "long sleeve" in msg_lower: updated['measurements']['sleeve_length'] = 65
         elif "short sleeve" in msg_lower: updated['measurements']['sleeve_length'] = 25
         if "no collar" in msg_lower: updated['features']['has_collar'] = False; updated['features']['collar_type'] = 'none'
@@ -243,92 +247,43 @@ def chat_edit(message, history):
     history = history or []; history.append((message, ai_msg))
     return history, p2d, p3d, summary, j
-# ── Agentic Refinement ──────────────────────────────────────────────────────
 def run_refinement(image, garment_type_override, max_iters):
-    """Run the agentic refinement loop."""
     if image is None:
-        yield None, None, None, "Please upload a garment image.", "{}", None
-        return
-    # Step 1: Initial VLM analysis
     analysis = None
-    try:
-        analysis = analyze_with_vlm(image)
-    except Exception as e:
-        print(f"VLM failed: {e}")
     if analysis is None:
         gt = garment_type_override.lower() if garment_type_override != "Auto-detect" else "shirt"
         analysis = get_default_analysis(gt)
-    if garment_type_override != "Auto-detect":
-        analysis['garment_type'] = garment_type_override.lower()
-    # Generate function for the loop
-    def gen_fn(a):
-        return _generate_all_outputs(a)
-    # Run refinement loop
-    max_iters = int(max_iters)
-    result = refinement_loop(
-        original_image=image,
-        initial_analysis=analysis,
-        generate_fn=gen_fn,
-        max_iterations=max_iters,
-        target_composite=0.82,
-        plateau_patience=3,
-        lr=0.7,
-    )
-    # Build log markdown
-    log_lines = [f"## Refinement Results\n"]
-    log_lines.append(f"**Converged:** {'✅ Yes' if result['converged'] else '❌ No'}")
-    log_lines.append(f"**Iterations:** {result['total_iterations']}")
-    log_lines.append(f"**Best Score:** {result['best_score']:.4f}")
-    if result['scores']:
-        log_lines.append(f"**Score progression:** {' → '.join(f'{s:.3f}' for s in result['scores'])}")
     log_lines.append("")
     for step in result['history']:
-        it = step['iteration']
-        status = step.get('status', '?')
-        metrics = step.get('metrics', {})
         log_lines.append(f"### Iteration {it} — {status}")
-        if metrics:
-            log_lines.append(f"SSIM={metrics.get('ssim',0):.3f} | Edge={metrics.get('edge_ssim',0):.3f} | Composite={metrics.get('composite',0):.3f}")
-        if step.get('new_best'):
-            log_lines.append("⭐ **New best!**")
         diffs = step.get('vlm_differences', [])
-        if diffs:
-            log_lines.append("**Differences:** " + "; ".join(diffs[:3]))
         adj = step.get('adjustments', {})
-        if adj:
-            log_lines.append("**Adjustments:** " + ", ".join(f"{k}={v}" for k, v in adj.items()))
         reason = step.get('reason', '')
-        if reason:
-            log_lines.append(f"*{reason}*")
         log_lines.append("")
-    log_md = "\n".join(log_lines)
-    # Get best outputs
-    best = result['best_analysis']
-    _current_analysis["data"] = copy.deepcopy(best)
-    try:
-        p2d, p3d, summary, j = _generate_all_outputs(best)
-    except:
-        p2d, p3d, summary, j = None, None, "Error generating final outputs", "{}"
-    # Get last projection
     last_proj = None
     for step in reversed(result['history']):
-        if 'projection' in step:
-            last_proj = step['projection']
-            break
-    yield p2d, p3d, last_proj, log_md, j, summary
-# ── UI ──────────────────────────────────────────────────────────────────────
 CSS = """
 .main-header { text-align: center; margin-bottom: 20px; }
 .info-box { padding: 15px; border-radius: 10px; background: #f0f7ff; border: 1px solid #cce0ff; margin: 10px 0; }
@@ -336,15 +291,11 @@ CSS = """
 """
 with gr.Blocks(title="Garment Pattern Studio") as demo:
-    gr.HTML("""<div class="main-header">
-        <h1>🧵 Garment Pattern Studio</h1>
-        <p style="font-size: 1.1em; color: #555;">Analyze garments, edit with chat, preview in 3D, refine with AI agent</p>
-    </div>
-    <div class="info-box">
-        <b>Powered by:</b> Qwen 3.5 · Gemma 4 · Kimi K2.5 via
         <a href="https://huggingface.co/docs/inference-providers">HF Inference Providers</a>
-        &nbsp;|&nbsp; <b>3D view built from actual 2D pattern pieces</b>
-    </div>""")
     with gr.Tab("📸 From Image"):
         with gr.Row():
@@ -365,14 +316,7 @@ with gr.Blocks(title="Garment Pattern Studio") as demo:
             with gr.Column(scale=1):
                 text_input = gr.Textbox(label="Describe the garment", placeholder="e.g., A fitted A-line dress with cap sleeves", lines=3)
                 text_btn = gr.Button("Generate Pattern", variant="primary", size="lg")
-                gr.Examples(examples=[
-                    ["A classic dress shirt with long sleeves and button-down collar"],
-                    ["A flared midi skirt with high waist"],
-                    ["An oversized hoodie with kangaroo pocket"],
-                    ["A fitted blazer with notched lapel collar"],
-                    ["Slim-fit straight-leg jeans with pockets"],
-                    ["A knee-length A-line dress with cap sleeves"],
-                ], inputs=text_input)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Column(): txt_pattern_2d = gr.Image(label="2D Pattern", height=400)
@@ -425,12 +369,13 @@ with gr.Blocks(title="Garment Pattern Studio") as demo:
     with gr.Tab("🔄 Agentic Refinement"):
         gr.Markdown("""### Iterative Refinement Loop
 Upload a garment image. The AI agent will:
-1. **Analyze** → extract initial pattern parameters
-2. **Generate** → create 2D pattern + 3D garment
 3. **Project** → render 3D to 2D front view
-4. **Compare** → measure similarity (SSIM + VLM visual comparison)
-5. **Refine** → VLM suggests parameter adjustments
-6. **Repeat** until convergence or max iterations
 *Requires HF_TOKEN for VLM-powered refinement.*""")
         with gr.Row():
@@ -448,17 +393,14 @@ Upload a garment image. The AI agent will:
                     with gr.Column(): refine_log = gr.Markdown(label="Refinement Log")
                 refine_summary = gr.Markdown()
                 with gr.Accordion("Best Parameters JSON", open=False): refine_json = gr.Code(language="json")
-        refine_btn.click(run_refinement,
-                         inputs=[refine_image, refine_type, refine_iters],
                          outputs=[refine_2d, refine_3d, refine_proj, refine_log, refine_json, refine_summary])
-    gr.HTML("""<div class="ref-box" style="margin-top: 20px;"><h4>Research References</h4><ul>
-        <li><b>ChatGarment</b> (2024) — VLM + dialogue for garment editing [<a href="https://arxiv.org/abs/2412.17811">Paper</a>]</li>
-        <li><b>NGL-Prompter</b> (2025) — Training-free VLM pattern estimation [<a href="https://arxiv.org/abs/2602.20700">Paper</a>]</li>
-        <li><b>RRVF</b> (2025) — Render-compare visual feedback loops [<a href="https://arxiv.org/abs/2507.20766">Paper</a>]</li>
-        <li><b>SceneAssistant</b> (2026) — Agentic VLM scene refinement [<a href="https://arxiv.org/abs/2603.12238">Paper</a>]</li>
-    </ul></div>""")
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, css=CSS, theme=gr.themes.Soft())

   }}
 }}"""
+# Actual Vision-Language Models with correct providers
 VISION_MODELS = [
+    ("Qwen/Qwen2.5-VL-72B-Instruct", "together", "Qwen2.5-VL 72B"),
+    ("google/gemma-4-31B-it", "novita", "Gemma 4 31B"),
+    ("moonshotai/Kimi-K2.5", "fireworks-ai", "Kimi K2.5"),
+]
+# Text-only models (for chat edit without images)
+TEXT_MODELS = [
+    ("Qwen/Qwen2.5-VL-72B-Instruct", "together", "Qwen2.5-VL 72B"),
+    ("google/gemma-4-31B-it", "novita", "Gemma 4 31B"),
+    ("moonshotai/Kimi-K2.5", "fireworks-ai", "Kimi K2.5"),
 ]
 def _extract_response_text(message):
                 analysis['_model_used'] = display_name
                 print(f"[VLM] OK: {display_name} detected {analysis.get('garment_type','?')}")
                 return analysis
+            else: print(f"[VLM] {display_name}: HTTP {response.status_code} - {response.text[:200]}")
         except Exception as e:
             print(f"[VLM] {display_name} failed: {e}"); continue
     return None
         p2d, p3d, summary, j = _generate_all_outputs(analysis)
         return p2d, p3d, summary, j, []
     except Exception as e:
+        traceback.print_exc(); return None, None, f"Error: {e}", "{}", []
 def process_text(description):
     if not description.strip(): return None, None, "Enter a description.", "{}", []
     if analysis is None:
         desc_lower = description.lower()
         for gt in ['hoodie','jacket','coat','blazer','dress','skirt','pants','trousers','jeans','vest','shirt','blouse','top']:
+            if gt in desc_lower: analysis = get_default_analysis(gt); analysis['description'] = description; break
         if analysis is None: analysis = get_default_analysis("shirt"); analysis['description'] = description
     _current_analysis["data"] = copy.deepcopy(analysis)
     try:
         try: updated = _call_vlm([{"role": "user", "content": edit_prompt}], timeout=90)
         except: pass
     if updated is None:
+        updated = copy.deepcopy(current); msg_lower = message.lower()
         if "long sleeve" in msg_lower: updated['measurements']['sleeve_length'] = 65
         elif "short sleeve" in msg_lower: updated['measurements']['sleeve_length'] = 25
         if "no collar" in msg_lower: updated['features']['has_collar'] = False; updated['features']['collar_type'] = 'none'
     history = history or []; history.append((message, ai_msg))
     return history, p2d, p3d, summary, j
 def run_refinement(image, garment_type_override, max_iters):
     if image is None:
+        yield None, None, None, "Please upload a garment image.", "{}", None; return
     analysis = None
+    try: analysis = analyze_with_vlm(image)
+    except Exception as e: print(f"VLM failed: {e}")
     if analysis is None:
         gt = garment_type_override.lower() if garment_type_override != "Auto-detect" else "shirt"
         analysis = get_default_analysis(gt)
+    if garment_type_override != "Auto-detect": analysis['garment_type'] = garment_type_override.lower()
+    def gen_fn(a): return _generate_all_outputs(a)
+    result = refinement_loop(original_image=image, initial_analysis=analysis, generate_fn=gen_fn,
+                             max_iterations=int(max_iters), target_composite=0.82, plateau_patience=3, lr=0.7)
+    log_lines = [f"## Refinement Results\n", f"**Converged:** {'✅ Yes' if result['converged'] else '❌ No'}",
+                 f"**Iterations:** {result['total_iterations']}", f"**Best Score:** {result['best_score']:.4f}"]
+    if result['scores']: log_lines.append(f"**Scores:** {' → '.join(f'{s:.3f}' for s in result['scores'])}")
     log_lines.append("")
     for step in result['history']:
+        it, status, metrics = step['iteration'], step.get('status','?'), step.get('metrics',{})
         log_lines.append(f"### Iteration {it} — {status}")
+        if metrics: log_lines.append(f"SSIM={metrics.get('ssim',0):.3f} | Edge={metrics.get('edge_ssim',0):.3f} | Composite={metrics.get('composite',0):.3f}")
+        if step.get('new_best'): log_lines.append("⭐ **New best!**")
         diffs = step.get('vlm_differences', [])
+        if diffs: log_lines.append("**Differences:** " + "; ".join(diffs[:3]))
         adj = step.get('adjustments', {})
+        if adj: log_lines.append("**Adjustments:** " + ", ".join(f"{k}={v}" for k, v in adj.items()))
         reason = step.get('reason', '')
+        if reason: log_lines.append(f"*{reason}*")
         log_lines.append("")
+    best = result['best_analysis']; _current_analysis["data"] = copy.deepcopy(best)
+    try: p2d, p3d, summary, j = _generate_all_outputs(best)
+    except: p2d, p3d, summary, j = None, None, "Error", "{}"
     last_proj = None
     for step in reversed(result['history']):
+        if 'projection' in step: last_proj = step['projection']; break
+    yield p2d, p3d, last_proj, "\n".join(log_lines), j, summary
 CSS = """
 .main-header { text-align: center; margin-bottom: 20px; }
 .info-box { padding: 15px; border-radius: 10px; background: #f0f7ff; border: 1px solid #cce0ff; margin: 10px 0; }
 """
 with gr.Blocks(title="Garment Pattern Studio") as demo:
+    gr.HTML("""<div class="main-header"><h1>🧵 Garment Pattern Studio</h1>
+        <p style="font-size:1.1em;color:#555;">Analyze garments, edit with chat, preview in 3D, refine with AI agent</p></div>
+    <div class="info-box"><b>Powered by:</b> Qwen2.5-VL 72B · Gemma 4 31B · Kimi K2.5 via
         <a href="https://huggingface.co/docs/inference-providers">HF Inference Providers</a>
+        &nbsp;|&nbsp; <b>3D view built from actual 2D pattern pieces</b></div>""")
     with gr.Tab("📸 From Image"):
         with gr.Row():
             with gr.Column(scale=1):
                 text_input = gr.Textbox(label="Describe the garment", placeholder="e.g., A fitted A-line dress with cap sleeves", lines=3)
                 text_btn = gr.Button("Generate Pattern", variant="primary", size="lg")
+                gr.Examples(examples=[["A classic dress shirt with long sleeves and button-down collar"],["A flared midi skirt with high waist"],["An oversized hoodie with kangaroo pocket"],["A fitted blazer with notched lapel collar"],["Slim-fit straight-leg jeans with pockets"],["A knee-length A-line dress with cap sleeves"]], inputs=text_input)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Column(): txt_pattern_2d = gr.Image(label="2D Pattern", height=400)
     with gr.Tab("🔄 Agentic Refinement"):
         gr.Markdown("""### Iterative Refinement Loop
 Upload a garment image. The AI agent will:
+1. **Analyze** → extract initial pattern parameters via VLM
+2. **Generate** → create 2D pattern + 3D garment from pattern pieces
 3. **Project** → render 3D to 2D front view
+4. **Compare** → SSIM + Edge-SSIM similarity metrics
+5. **VLM Assess** → visual comparison, identify differences, suggest adjustments
+6. **Refine** → apply damped parameter updates, keep-best tracking
+7. **Repeat** until convergence or max iterations
 *Requires HF_TOKEN for VLM-powered refinement.*""")
         with gr.Row():
                     with gr.Column(): refine_log = gr.Markdown(label="Refinement Log")
                 refine_summary = gr.Markdown()
                 with gr.Accordion("Best Parameters JSON", open=False): refine_json = gr.Code(language="json")
+        refine_btn.click(run_refinement, inputs=[refine_image, refine_type, refine_iters],
                          outputs=[refine_2d, refine_3d, refine_proj, refine_log, refine_json, refine_summary])
+    gr.HTML("""<div class="ref-box" style="margin-top:20px;"><h4>Research References</h4><ul>
+        <li><b>ChatGarment</b> (2024) [<a href="https://arxiv.org/abs/2412.17811">Paper</a>]</li>
+        <li><b>NGL-Prompter</b> (2025) [<a href="https://arxiv.org/abs/2602.20700">Paper</a>]</li>
+        <li><b>RRVF</b> (2025) — Render-compare visual feedback [<a href="https://arxiv.org/abs/2507.20766">Paper</a>]</li>
+        <li><b>SceneAssistant</b> (2026) — Agentic VLM refinement [<a href="https://arxiv.org/abs/2603.12238">Paper</a>]</li></ul></div>""")
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, css=CSS, theme=gr.themes.Soft())