Spaces:

vikashmakeit
/

garment-to-pattern

Running

App Files Files Community

vikashmakeit commited on 13 days ago

Commit

8e1bee3

verified ·

1 Parent(s): e9ba07a

Fix VLM models in refinement_loop.py to match app.py (Llama-4-Scout, Kimi-K2.6, Qwen3.5-9B)

Browse files

Files changed (1) hide show

refinement_loop.py +11 -14

refinement_loop.py CHANGED Viewed

@@ -12,7 +12,6 @@ from typing import Dict, List, Tuple, Optional
 def render_3d_to_image(plotly_fig, elev=15, azim=45, width=512, height=512):
-    """Render a Plotly 3D figure to a PIL image using matplotlib."""
     fig = plt.figure(figsize=(width / 100, height / 100), dpi=100)
     ax = fig.add_subplot(111, projection='3d')
     for trace in plotly_fig.data:
@@ -21,10 +20,10 @@ def render_3d_to_image(plotly_fig, elev=15, azim=45, width=512, height=512):
                 x, y, z = np.array(trace.x), np.array(trace.y), np.array(trace.z)
                 ax.plot_surface(x, y, z, alpha=0.08, color='#E8D0B0', edgecolor='none', shade=False)
             elif hasattr(trace, 'i') and trace.i is not None:
-                verts_x, verts_y, verts_z = np.array(trace.x, dtype=float), np.array(trace.y, dtype=float), np.array(trace.z, dtype=float)
-                faces_i, faces_j, faces_k = np.array(trace.i, dtype=int), np.array(trace.j, dtype=int), np.array(trace.k, dtype=int)
-                verts = list(zip(verts_x, verts_y, verts_z))
-                faces = [[verts[i], verts[j], verts[k]] for i, j, k in zip(faces_i, faces_j, faces_k)]
                 color = trace.color if hasattr(trace, 'color') and trace.color else '#4A90D9'
                 ax.add_collection3d(Poly3DCollection(faces, alpha=0.75, facecolor=color, edgecolor='none'))
             elif hasattr(trace, 'x') and trace.x is not None:
@@ -43,7 +42,6 @@ def render_3d_to_image(plotly_fig, elev=15, azim=45, width=512, height=512):
 def compute_similarity(img1, img2, size=(256, 256)):
-    """Compute CPU-based similarity metrics."""
     from skimage.metrics import structural_similarity as ssim_fn
     from skimage import filters
     arr1 = np.array(img1.resize(size).convert('RGB'), dtype=float)
@@ -69,7 +67,6 @@ def _image_to_b64(img, max_dim=512):
 def vlm_compare_and_adjust(original_img, projection_img, current_params,
                            iteration, metrics, hf_token):
-    """Use VLM to compare images and suggest parameter adjustments."""
     import requests
     orig_b64 = _image_to_b64(original_img)
     proj_b64 = _image_to_b64(projection_img)
@@ -97,11 +94,11 @@ Only adjust params that exist in current params. Set converged=true if sufficien
         {"type": "text", "text": prompt}
     ]}]
-    # Use actual VLMs with correct providers
     models = [
-        ("Qwen/Qwen2.5-VL-72B-Instruct", "together"),
-        ("google/gemma-4-31B-it", "novita"),
-        ("moonshotai/Kimi-K2.5", "fireworks-ai"),
     ]
     for model_id, provider in models:
@@ -111,8 +108,9 @@ Only adjust params that exist in current params. Set converged=true if sufficien
             payload = {"model": model_id, "messages": messages, "max_tokens": 1500, "temperature": 0.1}
             resp = requests.post(url, headers=headers, json=payload, timeout=120)
             if resp.status_code == 200:
-                text = resp.json()['choices'][0]['message'].get('content', '')
-                if not text: text = resp.json()['choices'][0]['message'].get('reasoning', '')
                 json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
                 json_str = json_match.group(1) if json_match else None
                 if not json_str:
@@ -152,7 +150,6 @@ def apply_adjustments(analysis, adjustments, lr=0.7):
 def refinement_loop(original_image, initial_analysis, generate_fn,
                     max_iterations=8, target_composite=0.82,
                     plateau_threshold=0.005, plateau_patience=3, lr=0.7):
-    """Run the agentic refinement loop."""
     hf_token = os.environ.get("HF_TOKEN", "")
     current_analysis = copy.deepcopy(initial_analysis)
     best_analysis = copy.deepcopy(initial_analysis)

 def render_3d_to_image(plotly_fig, elev=15, azim=45, width=512, height=512):
     fig = plt.figure(figsize=(width / 100, height / 100), dpi=100)
     ax = fig.add_subplot(111, projection='3d')
     for trace in plotly_fig.data:
                 x, y, z = np.array(trace.x), np.array(trace.y), np.array(trace.z)
                 ax.plot_surface(x, y, z, alpha=0.08, color='#E8D0B0', edgecolor='none', shade=False)
             elif hasattr(trace, 'i') and trace.i is not None:
+                vx, vy, vz = np.array(trace.x, dtype=float), np.array(trace.y, dtype=float), np.array(trace.z, dtype=float)
+                fi, fj, fk = np.array(trace.i, dtype=int), np.array(trace.j, dtype=int), np.array(trace.k, dtype=int)
+                verts = list(zip(vx, vy, vz))
+                faces = [[verts[i], verts[j], verts[k]] for i, j, k in zip(fi, fj, fk)]
                 color = trace.color if hasattr(trace, 'color') and trace.color else '#4A90D9'
                 ax.add_collection3d(Poly3DCollection(faces, alpha=0.75, facecolor=color, edgecolor='none'))
             elif hasattr(trace, 'x') and trace.x is not None:
 def compute_similarity(img1, img2, size=(256, 256)):
     from skimage.metrics import structural_similarity as ssim_fn
     from skimage import filters
     arr1 = np.array(img1.resize(size).convert('RGB'), dtype=float)
 def vlm_compare_and_adjust(original_img, projection_img, current_params,
                            iteration, metrics, hf_token):
     import requests
     orig_b64 = _image_to_b64(original_img)
     proj_b64 = _image_to_b64(projection_img)
         {"type": "text", "text": prompt}
     ]}]
+    # Verified working VLMs (tested 2026-04-25)
     models = [
+        ("meta-llama/Llama-4-Scout-17B-16E-Instruct", "nscale"),
+        ("moonshotai/Kimi-K2.6", "together"),
+        ("Qwen/Qwen3.5-9B", "together"),
     ]
     for model_id, provider in models:
             payload = {"model": model_id, "messages": messages, "max_tokens": 1500, "temperature": 0.1}
             resp = requests.post(url, headers=headers, json=payload, timeout=120)
             if resp.status_code == 200:
+                msg = resp.json()['choices'][0]['message']
+                text = (msg.get('content', '') or '').strip() or (msg.get('reasoning', '') or '').strip()
+                if not text: continue
                 json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
                 json_str = json_match.group(1) if json_match else None
                 if not json_str:
 def refinement_loop(original_image, initial_analysis, generate_fn,
                     max_iterations=8, target_composite=0.82,
                     plateau_threshold=0.005, plateau_patience=3, lr=0.7):
     hf_token = os.environ.get("HF_TOKEN", "")
     current_analysis = copy.deepcopy(initial_analysis)
     best_analysis = copy.deepcopy(initial_analysis)