Spaces:

vikashmakeit
/

garment-to-pattern

Running

App Files Files Community

vikashmakeit commited on 13 days ago

Commit

ee597f9

verified ·

1 Parent(s): 775c082

Add agentic refinement loop module

Browse files

Files changed (1) hide show

refinement_loop.py +331 -0

refinement_loop.py ADDED Viewed

	@@ -0,0 +1,331 @@

+"""
+Agentic Refinement Loop: Image → Pattern → 3D → Projection → Compare → Refine
+Iteratively refines garment pattern parameters until the 3D garment projection
+matches the original input image. Uses:
+  - Matplotlib 3D rendering for projection (CPU, no Chrome)
+  - SSIM + Edge-SSIM for fast similarity gating (CPU)
+  - VLM (via HF Inference API) for visual comparison and parameter adjustment
+  - Keep-best tracking to prevent oscillation
+"""
+import json, os, copy, base64, io, re
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d.art3d import Poly3DCollection
+from PIL import Image
+from typing import Dict, List, Tuple, Optional
+def render_3d_to_image(plotly_fig, elev=15, azim=45, width=512, height=512):
+    """Render a Plotly 3D figure to a PIL image using matplotlib."""
+    fig = plt.figure(figsize=(width / 100, height / 100), dpi=100)
+    ax = fig.add_subplot(111, projection='3d')
+    for trace in plotly_fig.data:
+        try:
+            if trace.name == "Body":
+                x, y, z = np.array(trace.x), np.array(trace.y), np.array(trace.z)
+                ax.plot_surface(x, y, z, alpha=0.08, color='#E8D0B0',
+                                edgecolor='none', shade=False)
+            elif hasattr(trace, 'i') and trace.i is not None:
+                verts_x = np.array(trace.x, dtype=float)
+                verts_y = np.array(trace.y, dtype=float)
+                verts_z = np.array(trace.z, dtype=float)
+                faces_i = np.array(trace.i, dtype=int)
+                faces_j = np.array(trace.j, dtype=int)
+                faces_k = np.array(trace.k, dtype=int)
+                verts = list(zip(verts_x, verts_y, verts_z))
+                faces = [[verts[i], verts[j], verts[k]]
+                         for i, j, k in zip(faces_i, faces_j, faces_k)]
+                color = trace.color if hasattr(trace, 'color') and trace.color else '#4A90D9'
+                poly = Poly3DCollection(faces, alpha=0.75,
+                                        facecolor=color, edgecolor='none')
+                ax.add_collection3d(poly)
+            elif hasattr(trace, 'x') and trace.x is not None:
+                x = np.array(trace.x, dtype=float)
+                y = np.array(trace.y, dtype=float)
+                z = np.array(trace.z, dtype=float)
+                if x.ndim == 2:
+                    ax.plot_surface(x, y, z, alpha=0.6, color='#4A90D9',
+                                    edgecolor='none', shade=True)
+        except Exception:
+            continue
+    ax.view_init(elev=elev, azim=azim)
+    ax.set_xlim(-35, 35)
+    ax.set_ylim(-35, 35)
+    ax.set_zlim(0, 180)
+    ax.axis('off')
+    ax.set_facecolor('white')
+    fig.patch.set_facecolor('white')
+    buf = io.BytesIO()
+    fig.savefig(buf, format='png', dpi=100, bbox_inches='tight',
+                facecolor='white', pad_inches=0.1)
+    plt.close(fig)
+    buf.seek(0)
+    return Image.open(buf).convert('RGB')
+def compute_similarity(img1: Image.Image, img2: Image.Image,
+                       size=(256, 256)) -> Dict:
+    """Compute CPU-based similarity metrics between two images."""
+    from skimage.metrics import structural_similarity as ssim_fn
+    from skimage import filters
+    arr1 = np.array(img1.resize(size).convert('RGB'), dtype=float)
+    arr2 = np.array(img2.resize(size).convert('RGB'), dtype=float)
+    ssim_val = ssim_fn(arr1 / 255.0, arr2 / 255.0, channel_axis=2, data_range=1.0)
+    mse_val = 1.0 - np.mean((arr1 - arr2) ** 2) / (255.0 ** 2)
+    gray1 = arr1.mean(axis=2) / 255.0
+    gray2 = arr2.mean(axis=2) / 255.0
+    edges1 = filters.sobel(gray1)
+    edges2 = filters.sobel(gray2)
+    edge_ssim_val = ssim_fn(edges1, edges2, data_range=1.0)
+    composite = 0.4 * ssim_val + 0.3 * mse_val + 0.3 * edge_ssim_val
+    return {
+        'ssim': round(float(ssim_val), 4),
+        'mse': round(float(mse_val), 4),
+        'edge_ssim': round(float(edge_ssim_val), 4),
+        'composite': round(float(composite), 4),
+    }
+def _image_to_b64(img: Image.Image, max_dim=512) -> str:
+    if max(img.size) > max_dim:
+        ratio = max_dim / max(img.size)
+        img = img.resize((int(img.size[0] * ratio), int(img.size[1] * ratio)), Image.LANCZOS)
+    buf = io.BytesIO()
+    img.convert('RGB').save(buf, format='JPEG', quality=85)
+    return base64.b64encode(buf.getvalue()).decode('utf-8')
+def vlm_compare_and_adjust(original_img, projection_img, current_params,
+                           iteration, metrics, hf_token):
+    """Use VLM to compare images and suggest parameter adjustments."""
+    import requests
+    orig_b64 = _image_to_b64(original_img)
+    proj_b64 = _image_to_b64(projection_img)
+    display_params = {k: v for k, v in current_params.items() if k != '_model_used'}
+    prompt = f"""You are a garment pattern expert doing iterative refinement.
+Iteration {iteration}. Current similarity: SSIM={metrics['ssim']:.3f}, Edge={metrics['edge_ssim']:.3f}, Composite={metrics['composite']:.3f}
+Current garment parameters:
+{json.dumps(display_params, indent=2)}
+Image 1 = ORIGINAL garment photo. Image 2 = 3D pattern projection.
+Compare carefully. Identify differences in: silhouette, sleeve length/width, neckline/collar, hem length/flare, fit.
+Return ONLY valid JSON (no markdown):
+{{"differences": ["diff1", "diff2"], "adjustments": {{"param": value}}, "confidence": 0.0_to_1.0, "converged": true_or_false}}
+Only adjust params that exist in current params. Set converged=true if sufficiently similar."""
+    messages = [{"role": "user", "content": [
+        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{orig_b64}"}},
+        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{proj_b64}"}},
+        {"type": "text", "text": prompt}
+    ]}]
+    models = [
+        ("Qwen/Qwen3.5-9B", "together"),
+        ("google/gemma-4-31B-it", "together"),
+        ("moonshotai/Kimi-K2.5", "together"),
+    ]
+    for model_id, provider in models:
+        try:
+            url = f"https://router.huggingface.co/{provider}/v1/chat/completions"
+            headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
+            payload = {"model": model_id, "messages": messages, "max_tokens": 1500, "temperature": 0.1}
+            resp = requests.post(url, headers=headers, json=payload, timeout=120)
+            if resp.status_code == 200:
+                text = resp.json()['choices'][0]['message'].get('content', '')
+                if not text:
+                    text = resp.json()['choices'][0]['message'].get('reasoning', '')
+                json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
+                if json_match:
+                    json_str = json_match.group(1)
+                else:
+                    json_match = re.search(r'\{[\s\S]*\}', text)
+                    if json_match:
+                        json_str = json_match.group()
+                    else:
+                        continue
+                result = json.loads(json_str)
+                result['_model'] = model_id.split('/')[-1]
+                return result
+        except Exception as e:
+            print(f"[Refine] {model_id}: {e}")
+            continue
+    return None
+def apply_adjustments(analysis, adjustments, lr=0.7):
+    """Apply parameter adjustments with damping factor."""
+    updated = copy.deepcopy(analysis)
+    measurements = updated.get('measurements', {})
+    features = updated.get('features', {})
+    for param, new_value in adjustments.items():
+        if param in measurements:
+            old_value = measurements[param]
+            if isinstance(old_value, (int, float)) and isinstance(new_value, (int, float)):
+                measurements[param] = round(old_value + lr * (new_value - old_value), 1)
+            else:
+                measurements[param] = new_value
+        elif param in features:
+            features[param] = new_value
+        elif param == 'garment_type':
+            updated['garment_type'] = new_value
+    updated['measurements'] = measurements
+    updated['features'] = features
+    return updated
+def refinement_loop(original_image, initial_analysis, generate_fn,
+                    max_iterations=8, target_composite=0.82,
+                    plateau_threshold=0.005, plateau_patience=3, lr=0.7):
+    """Run the agentic refinement loop.
+    Args:
+        original_image: PIL Image of the garment
+        initial_analysis: dict with garment_type, measurements, features
+        generate_fn: function(analysis) → (pattern_img, fig_3d, summary, json_str)
+        max_iterations: max steps
+        target_composite: similarity target
+        lr: damping factor
+    Returns:
+        dict with best_analysis, history, scores, converged, etc.
+    """
+    hf_token = os.environ.get("HF_TOKEN", "")
+    current_analysis = copy.deepcopy(initial_analysis)
+    best_analysis = copy.deepcopy(initial_analysis)
+    best_score = -1.0
+    history = []
+    scores = []
+    plateau_count = 0
+    for iteration in range(1, max_iterations + 1):
+        step = {"iteration": iteration}
+        # Generate pattern + 3D
+        try:
+            pattern_img, fig_3d, summary, json_str = generate_fn(current_analysis)
+        except Exception as e:
+            step["status"] = "error"
+            step["reason"] = f"Generation failed: {e}"
+            history.append(step)
+            break
+        # Render 3D → 2D
+        try:
+            projection = render_3d_to_image(fig_3d, elev=15, azim=0)
+        except Exception as e:
+            step["status"] = "error"
+            step["reason"] = f"Rendering failed: {e}"
+            history.append(step)
+            break
+        # Compute similarity
+        metrics = compute_similarity(original_image, projection)
+        step["metrics"] = metrics
+        step["projection"] = projection
+        step["pattern_image"] = pattern_img
+        step["fig_3d"] = fig_3d
+        step["params"] = copy.deepcopy(current_analysis)
+        scores.append(metrics['composite'])
+        # Keep-best
+        if metrics['composite'] > best_score:
+            best_score = metrics['composite']
+            best_analysis = copy.deepcopy(current_analysis)
+            step["new_best"] = True
+        else:
+            step["new_best"] = False
+        # Convergence: target reached
+        if metrics['composite'] >= target_composite:
+            step["status"] = "converged"
+            step["reason"] = f"Target {target_composite} reached: {metrics['composite']:.4f}"
+            history.append(step)
+            break
+        # Convergence: plateau
+        if len(scores) >= 2:
+            if abs(scores[-1] - scores[-2]) < plateau_threshold:
+                plateau_count += 1
+            else:
+                plateau_count = 0
+            if plateau_count >= plateau_patience:
+                step["status"] = "plateau"
+                step["reason"] = f"Plateau for {plateau_patience} iterations"
+                history.append(step)
+                break
+        # VLM feedback
+        if hf_token:
+            vlm_result = vlm_compare_and_adjust(
+                original_image, projection, current_analysis,
+                iteration, metrics, hf_token)
+        else:
+            vlm_result = None
+        if vlm_result:
+            step["vlm_differences"] = vlm_result.get('differences', [])
+            step["vlm_confidence"] = vlm_result.get('confidence', 0)
+            if vlm_result.get('converged', False):
+                step["status"] = "vlm_converged"
+                step["reason"] = "VLM declared convergence"
+                history.append(step)
+                break
+            if vlm_result.get('confidence', 1.0) < 0.2:
+                step["status"] = "low_confidence"
+                step["reason"] = f"VLM confidence: {vlm_result['confidence']}"
+                history.append(step)
+                break
+            adjustments = vlm_result.get('adjustments', {})
+            if adjustments:
+                current_analysis = apply_adjustments(current_analysis, adjustments, lr=lr)
+                step["adjustments"] = adjustments
+        else:
+            step["status"] = "no_vlm"
+            step["reason"] = "No VLM available (set HF_TOKEN)"
+            history.append(step)
+            break
+        step["status"] = "continuing"
+        history.append(step)
+    if history and history[-1].get("status") == "continuing":
+        history[-1]["status"] = "max_iterations"
+        history[-1]["reason"] = f"Max {max_iterations} iterations reached"
+    return {
+        "best_analysis": best_analysis,
+        "best_score": best_score,
+        "history": history,
+        "total_iterations": len(history),
+        "converged": any(h.get("status") in ("converged", "vlm_converged") for h in history),
+        "scores": scores,
+    }