Spaces:

gpue
/

foundationpose

Sleeping

App Files Files Community

Georg commited on Jan 29

Commit

e219ce4

1 Parent(s): 1fd398f

Add joblib dependency and implement CAD-based & model-free init tabs

Browse files

Files changed (20) hide show

Dockerfile.base +1 -0
app.py +147 -39
client.py +126 -71
tests/README.md +33 -0
tests/reference/target_cube/image_001.jpg +0 -0
tests/reference/target_cube/image_002.jpg +0 -0
tests/reference/target_cube/image_003.jpg +0 -0
tests/reference/target_cube/image_004.jpg +0 -0
tests/reference/target_cube/image_005.jpg +0 -0
tests/reference/target_cube/image_006.jpg +0 -0
tests/reference/target_cube/image_007.jpg +0 -0
tests/reference/target_cube/image_008.jpg +0 -0
tests/reference/target_cube/image_009.jpg +0 -0
tests/reference/target_cube/image_010.jpg +0 -0
tests/reference/target_cube/image_011.jpg +0 -0
tests/reference/target_cube/image_012.jpg +0 -0
tests/reference/target_cube/image_013.jpg +0 -0
tests/reference/target_cube/image_014.jpg +0 -0
tests/reference/target_cube/image_015.jpg +0 -0
tests/test_estimator.py +183 -0

Dockerfile.base CHANGED Viewed

@@ -70,6 +70,7 @@ RUN pip install --no-cache-dir \
     timm==0.9.16 \
     transformations==2024.6.1 \
     pyyaml==6.0.1 \
     && pip cache purge
 # Note: nvdiffrast will be built in final Dockerfile on HuggingFace (needs GPU)

     timm==0.9.16 \
     transformations==2024.6.1 \
     pyyaml==6.0.1 \
+    joblib==1.4.0 \
     && pip cache purge
 # Note: nvdiffrast will be built in final Dockerfile on HuggingFace (needs GPU)

app.py CHANGED Viewed

@@ -167,8 +167,51 @@ pose_estimator = FoundationPoseInference()
 # Gradio wrapper functions
-def gradio_initialize(object_id: str, reference_files: List, fx: float, fy: float, cx: float, cy: float):
-    """Gradio wrapper for object initialization."""
     try:
         if not reference_files:
             return "Error: No reference images provided"
@@ -185,6 +228,9 @@ def gradio_initialize(object_id: str, reference_files: List, fx: float, fy: floa
         if not reference_images:
             return "Error: Could not load any reference images"
         # Prepare camera intrinsics
         camera_intrinsics = {
             "fx": fx,
@@ -193,20 +239,21 @@ def gradio_initialize(object_id: str, reference_files: List, fx: float, fy: floa
             "cy": cy
         }
-        # Register object
         success = pose_estimator.register_object(
             object_id=object_id,
             reference_images=reference_images,
-            camera_intrinsics=camera_intrinsics
         )
         if success:
-            return f"✓ Object '{object_id}' initialized with {len(reference_images)} reference images"
         else:
             return f"✗ Failed to initialize object '{object_id}'"
     except Exception as e:
-        logger.error(f"Gradio initialization error: {e}", exc_info=True)
         return f"Error: {str(e)}"
@@ -290,47 +337,108 @@ with gr.Blocks(title="FoundationPose Inference", theme=gr.themes.Soft()) as demo
         # Tab 1: Initialize Object
         with gr.Tab("Initialize Object"):
             gr.Markdown("""
-            Upload reference images of your object from different angles (8-20 images recommended).
-            The model will learn the object's appearance for pose estimation.
             """)
-            with gr.Row():
-                with gr.Column():
-                    init_object_id = gr.Textbox(
-                        label="Object ID",
-                        placeholder="e.g., target_cube",
-                        value="target_cube"
-                    )
-                    init_ref_files = gr.File(
-                        label="Reference Images",
-                        file_count="multiple",
-                        file_types=["image"]
-                    )
-                    gr.Markdown("### Camera Intrinsics")
                     with gr.Row():
-                        init_fx = gr.Number(label="fx (focal length x)", value=500.0)
-                        init_fy = gr.Number(label="fy (focal length y)", value=500.0)
-                    with gr.Row():
-                        init_cx = gr.Number(label="cx (principal point x)", value=320.0)
-                        init_cy = gr.Number(label="cy (principal point y)", value=240.0)
-                    init_button = gr.Button("Initialize Object", variant="primary")
-                with gr.Column():
-                    init_output = gr.Textbox(
-                        label="Initialization Result",
-                        lines=5,
-                        interactive=False
                     )
-            init_button.click(
-                fn=gradio_initialize,
-                inputs=[init_object_id, init_ref_files, init_fx, init_fy, init_cx, init_cy],
-                outputs=init_output
-            )
         # Tab 2: Estimate Pose
         with gr.Tab("Estimate Pose"):
             gr.Markdown("""

 # Gradio wrapper functions
+def gradio_initialize_cad(object_id: str, mesh_file, reference_files: List, fx: float, fy: float, cx: float, cy: float):
+    """Gradio wrapper for CAD-based object initialization."""
+    try:
+        if not mesh_file:
+            return "Error: No mesh file provided"
+        # Load reference images (optional for CAD mode)
+        reference_images = []
+        if reference_files:
+            for file in reference_files:
+                img = cv2.imread(file.name)
+                if img is None:
+                    continue
+                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+                reference_images.append(img)
+        # Prepare camera intrinsics
+        camera_intrinsics = {
+            "fx": fx,
+            "fy": fy,
+            "cx": cx,
+            "cy": cy
+        }
+        # Register object with mesh
+        success = pose_estimator.register_object(
+            object_id=object_id,
+            reference_images=reference_images if reference_images else [],
+            camera_intrinsics=camera_intrinsics,
+            mesh_path=mesh_file.name
+        )
+        if success:
+            ref_info = f" and {len(reference_images)} reference images" if reference_images else ""
+            return f"✓ Object '{object_id}' initialized with CAD model{ref_info}"
+        else:
+            return f"✗ Failed to initialize object '{object_id}'"
+    except Exception as e:
+        logger.error(f"CAD initialization error: {e}", exc_info=True)
+        return f"Error: {str(e)}"
+def gradio_initialize_model_free(object_id: str, reference_files: List, fx: float, fy: float, cx: float, cy: float):
+    """Gradio wrapper for model-free object initialization."""
     try:
         if not reference_files:
             return "Error: No reference images provided"
         if not reference_images:
             return "Error: Could not load any reference images"
+        if len(reference_images) < 8:
+            return f"Warning: Only {len(reference_images)} images provided. 16-24 recommended for best results."
         # Prepare camera intrinsics
         camera_intrinsics = {
             "fx": fx,
             "cy": cy
         }
+        # Register object without mesh (model-free)
         success = pose_estimator.register_object(
             object_id=object_id,
             reference_images=reference_images,
+            camera_intrinsics=camera_intrinsics,
+            mesh_path=None
         )
         if success:
+            return f"✓ Object '{object_id}' initialized with {len(reference_images)} reference images (model-free mode)"
         else:
             return f"✗ Failed to initialize object '{object_id}'"
     except Exception as e:
+        logger.error(f"Model-free initialization error: {e}", exc_info=True)
         return f"Error: {str(e)}"
         # Tab 1: Initialize Object
         with gr.Tab("Initialize Object"):
             gr.Markdown("""
+            Choose the initialization mode based on whether you have a 3D CAD model of your object.
             """)
+            with gr.Tabs():
+                # Sub-tab 1.1: CAD-Based Init
+                with gr.Tab("CAD-Based (Model-Based)"):
+                    gr.Markdown("""
+                    **Model-Based Mode**: Use this if you have a 3D mesh/CAD model (.obj, .stl, .ply).
+                    - Upload your 3D mesh file
+                    - Optionally upload reference images for better initialization
+                    - More accurate and robust
+                    """)
                     with gr.Row():
+                        with gr.Column():
+                            cad_object_id = gr.Textbox(
+                                label="Object ID",
+                                placeholder="e.g., target_cube",
+                                value="target_cube"
+                            )
+                            cad_mesh_file = gr.File(
+                                label="3D Mesh File (.obj, .stl, .ply)",
+                                file_count="single",
+                                file_types=[".obj", ".stl", ".ply", ".mesh"]
+                            )
+                            cad_ref_files = gr.File(
+                                label="Reference Images (Optional)",
+                                file_count="multiple",
+                                file_types=["image"]
+                            )
+                            gr.Markdown("### Camera Intrinsics")
+                            with gr.Row():
+                                cad_fx = gr.Number(label="fx", value=500.0)
+                                cad_fy = gr.Number(label="fy", value=500.0)
+                            with gr.Row():
+                                cad_cx = gr.Number(label="cx", value=320.0)
+                                cad_cy = gr.Number(label="cy", value=240.0)
+                            cad_init_button = gr.Button("Initialize with CAD", variant="primary")
+                        with gr.Column():
+                            cad_init_output = gr.Textbox(
+                                label="Initialization Result",
+                                lines=5,
+                                interactive=False
+                            )
+                    cad_init_button.click(
+                        fn=gradio_initialize_cad,
+                        inputs=[cad_object_id, cad_mesh_file, cad_ref_files, cad_fx, cad_fy, cad_cx, cad_cy],
+                        outputs=cad_init_output
+                    )
+                # Sub-tab 1.2: Model-Free Init
+                with gr.Tab("Model-Free (Reference-Based)"):
+                    gr.Markdown("""
+                    **Model-Free Mode**: Use this if you don't have a 3D model.
+                    - Upload 16-24 reference images from different viewpoints
+                    - Works without a 3D model
+                    - Less accurate than CAD-based but more flexible
+                    """)
+                    with gr.Row():
+                        with gr.Column():
+                            free_object_id = gr.Textbox(
+                                label="Object ID",
+                                placeholder="e.g., target_cube",
+                                value="target_cube"
+                            )
+                            free_ref_files = gr.File(
+                                label="Reference Images (16-24 recommended)",
+                                file_count="multiple",
+                                file_types=["image"]
+                            )
+                            gr.Markdown("### Camera Intrinsics")
+                            with gr.Row():
+                                free_fx = gr.Number(label="fx", value=500.0)
+                                free_fy = gr.Number(label="fy", value=500.0)
+                            with gr.Row():
+                                free_cx = gr.Number(label="cx", value=320.0)
+                                free_cy = gr.Number(label="cy", value=240.0)
+                            free_init_button = gr.Button("Initialize Model-Free", variant="primary")
+                        with gr.Column():
+                            free_init_output = gr.Textbox(
+                                label="Initialization Result",
+                                lines=5,
+                                interactive=False
+                            )
+                    free_init_button.click(
+                        fn=gradio_initialize_model_free,
+                        inputs=[free_object_id, free_ref_files, free_fx, free_fy, free_cx, free_cy],
+                        outputs=free_init_output
                     )
         # Tab 2: Estimate Pose
         with gr.Tab("Estimate Pose"):
             gr.Markdown("""

client.py CHANGED Viewed

@@ -5,22 +5,21 @@ This client can be used from the robot-ml training pipeline to call the
 FoundationPose inference API hosted on Hugging Face Spaces.
 """
-import base64
 import json
 import logging
-from io import BytesIO
 from pathlib import Path
 from typing import Dict, List, Optional
 import cv2
 import numpy as np
-import requests
 logger = logging.getLogger(__name__)
 class FoundationPoseClient:
-    """Client for FoundationPose API."""
     def __init__(self, api_url: str = "https://gpue-foundationpose.hf.space"):
         """Initialize client.
@@ -29,27 +28,26 @@ class FoundationPoseClient:
             api_url: Base URL of the FoundationPose Space
         """
         self.api_url = api_url.rstrip("/")
-        self.session = requests.Session()
-        self.session.headers.update({"Content-Type": "application/json"})
-    def _encode_image(self, image: np.ndarray) -> str:
-        """Encode image as base64 JPEG.
         Args:
             image: RGB image as numpy array
         Returns:
-            Base64-encoded JPEG string
         """
         # Convert RGB to BGR for OpenCV
         image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-        # Encode as JPEG
-        _, buffer = cv2.imencode(".jpg", image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 85])
-        # Convert to base64
-        image_b64 = base64.b64encode(buffer).decode("utf-8")
-        return image_b64
     def initialize(
         self,
@@ -62,7 +60,7 @@ class FoundationPoseClient:
         Args:
             object_id: Unique ID for the object
             reference_images: List of RGB images (numpy arrays)
-            camera_intrinsics: Optional camera parameters
         Returns:
             True if successful
@@ -72,39 +70,60 @@ class FoundationPoseClient:
         """
         logger.info(f"Initializing object '{object_id}' with {len(reference_images)} reference images")
-        # Encode images
-        images_b64 = [self._encode_image(img) for img in reference_images]
-        # Prepare request
-        payload = {
-            "object_id": object_id,
-            "reference_images_b64": images_b64,
-        }
-        if camera_intrinsics:
-            payload["camera_intrinsics"] = json.dumps(camera_intrinsics)
-        # Send request
         try:
-            response = self.session.post(
-                f"{self.api_url}/api/initialize",
-                json=payload,
-                timeout=120  # Long timeout for model loading
             )
-            response.raise_for_status()
-            result = response.json()
-            if not result.get("success"):
-                error = result.get("error", "Unknown error")
-                raise RuntimeError(f"Initialization failed: {error}")
-            logger.info(f"Object '{object_id}' initialized successfully")
-            return True
-        except requests.exceptions.RequestException as e:
             logger.error(f"API request failed: {e}")
             raise RuntimeError(f"Failed to initialize object: {e}")
     def estimate_pose(
         self,
@@ -117,7 +136,7 @@ class FoundationPoseClient:
         Args:
             object_id: ID of object to detect
             query_image: RGB query image as numpy array
-            camera_intrinsics: Optional camera parameters
         Returns:
             List of detected poses:
@@ -134,38 +153,74 @@ class FoundationPoseClient:
         Raises:
             RuntimeError: If estimation fails
         """
-        # Encode image
-        image_b64 = self._encode_image(query_image)
-        # Prepare request
-        payload = {
-            "object_id": object_id,
-            "query_image_b64": image_b64,
-        }
-        if camera_intrinsics:
-            payload["camera_intrinsics"] = json.dumps(camera_intrinsics)
-        # Send request
         try:
-            response = self.session.post(
-                f"{self.api_url}/api/estimate",
-                json=payload,
-                timeout=30
             )
-            response.raise_for_status()
-            result = response.json()
-            if not result.get("success"):
-                error = result.get("error", "Unknown error")
-                raise RuntimeError(f"Pose estimation failed: {error}")
-            return result.get("poses", [])
-        except requests.exceptions.RequestException as e:
             logger.error(f"API request failed: {e}")
             raise RuntimeError(f"Failed to estimate pose: {e}")
 def load_reference_images(directory: Path) -> List[np.ndarray]:

 FoundationPose inference API hosted on Hugging Face Spaces.
 """
 import json
 import logging
+import tempfile
 from pathlib import Path
 from typing import Dict, List, Optional
 import cv2
 import numpy as np
+from gradio_client import Client, handle_file
 logger = logging.getLogger(__name__)
 class FoundationPoseClient:
+    """Client for FoundationPose Gradio API."""
     def __init__(self, api_url: str = "https://gpue-foundationpose.hf.space"):
         """Initialize client.
             api_url: Base URL of the FoundationPose Space
         """
         self.api_url = api_url.rstrip("/")
+        logger.info(f"Initializing Gradio client for {self.api_url}")
+        self.client = Client(self.api_url)
+        logger.info("Gradio client initialized")
+    def _save_image_temp(self, image: np.ndarray) -> str:
+        """Save image to temporary file.
         Args:
             image: RGB image as numpy array
         Returns:
+            Path to temporary file
         """
         # Convert RGB to BGR for OpenCV
         image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        # Save to temp file
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
+        cv2.imwrite(temp_file.name, image_bgr, [cv2.IMWRITE_JPEG_QUALITY, 95])
+        return temp_file.name
     def initialize(
         self,
         Args:
             object_id: Unique ID for the object
             reference_images: List of RGB images (numpy arrays)
+            camera_intrinsics: Optional camera parameters (dict with fx, fy, cx, cy)
         Returns:
             True if successful
         """
         logger.info(f"Initializing object '{object_id}' with {len(reference_images)} reference images")
+        # Save images to temporary files
+        temp_files = []
         try:
+            for img in reference_images:
+                temp_path = self._save_image_temp(img)
+                temp_files.append(temp_path)
+            # Extract camera intrinsics or use defaults
+            if camera_intrinsics:
+                fx = camera_intrinsics.get("fx", 600.0)
+                fy = camera_intrinsics.get("fy", 600.0)
+                cx = camera_intrinsics.get("cx", 320.0)
+                cy = camera_intrinsics.get("cy", 240.0)
+            else:
+                fx, fy, cx, cy = 600.0, 600.0, 320.0, 240.0
+            # Call Gradio API
+            result = self.client.predict(
+                object_id=object_id,
+                reference_files=[handle_file(f) for f in temp_files],
+                fx=fx,
+                fy=fy,
+                cx=cx,
+                cy=cy,
+                api_name="/gradio_initialize"
             )
+            # Parse result - Gradio returns plain text
+            logger.info(f"API result: {result}")
+            if isinstance(result, str):
+                # Check if result indicates success (contains ✓ or "initialized")
+                if "✓" in result or "initialized" in result.lower():
+                    logger.info("Initialization successful")
+                    return True
+                elif "Error" in result or "error" in result:
+                    raise RuntimeError(f"Initialization failed: {result}")
+                else:
+                    # Assume success if no error indication
+                    return True
+            else:
+                raise RuntimeError(f"Unexpected result type: {type(result)}")
+        except RuntimeError:
+            raise
+        except Exception as e:
             logger.error(f"API request failed: {e}")
             raise RuntimeError(f"Failed to initialize object: {e}")
+        finally:
+            # Clean up temp files
+            for temp_file in temp_files:
+                try:
+                    Path(temp_file).unlink()
+                except Exception:
+                    pass
     def estimate_pose(
         self,
         Args:
             object_id: ID of object to detect
             query_image: RGB query image as numpy array
+            camera_intrinsics: Optional camera parameters (dict with fx, fy, cx, cy)
         Returns:
             List of detected poses:
         Raises:
             RuntimeError: If estimation fails
         """
+        # Save query image to temp file
+        temp_file = self._save_image_temp(query_image)
         try:
+            # Extract camera intrinsics or use defaults
+            if camera_intrinsics:
+                fx = camera_intrinsics.get("fx", 600.0)
+                fy = camera_intrinsics.get("fy", 600.0)
+                cx = camera_intrinsics.get("cx", 320.0)
+                cy = camera_intrinsics.get("cy", 240.0)
+            else:
+                fx, fy, cx, cy = 600.0, 600.0, 320.0, 240.0
+            # Call Gradio API
+            result = self.client.predict(
+                object_id=object_id,
+                query_image=handle_file(temp_file),
+                fx=fx,
+                fy=fy,
+                cx=cx,
+                cy=cy,
+                api_name="/gradio_estimate"
             )
+            # Parse result - Gradio may return tuple (text, image) or just text
+            logger.info(f"API result type: {type(result)}")
+            # If tuple, take first element (text output)
+            if isinstance(result, tuple):
+                result = result[0]
+            if isinstance(result, str):
+                logger.info(f"API result: {result}")
+                # Check for errors
+                if "Error" in result or "not initialized" in result:
+                    raise RuntimeError(f"Pose estimation failed: {result}")
+                # Try to parse as JSON (in case app.py returns JSON string)
+                try:
+                    result_dict = json.loads(result)
+                    if isinstance(result_dict, dict) and "poses" in result_dict:
+                        return result_dict["poses"]
+                except (json.JSONDecodeError, ValueError):
+                    pass
+                # Check if the result indicates no poses detected
+                if "No poses detected" in result or "⚠" in result:
+                    logger.info("No poses detected in query image")
+                    return []
+                # For now, return empty list with a warning
+                logger.warning(f"Could not parse pose from result: {result}")
+                return []
+            else:
+                raise RuntimeError(f"Unexpected result type: {type(result)}")
+        except RuntimeError:
+            raise
+        except Exception as e:
             logger.error(f"API request failed: {e}")
             raise RuntimeError(f"Failed to estimate pose: {e}")
+        finally:
+            # Clean up temp file
+            try:
+                Path(temp_file).unlink()
+            except Exception:
+                pass
 def load_reference_images(directory: Path) -> List[np.ndarray]:

tests/README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+# FoundationPose Tests
+This directory contains test scripts for the FoundationPose estimator.
+## Test Data
+Reference images for test objects are stored in `reference/target_cube/`.
+## Running Tests
+### Test Estimator Locally
+```bash
+cd /path/to/foundationpose
+python tests/test_estimator.py
+```
+### Test Against HuggingFace Space
+Use the client script to test the deployed API:
+```bash
+python client.py
+```
+## Test Coverage
+**test_estimator.py** tests:
+1. Estimator initialization
+2. Object registration with reference images
+3. Pose estimation on query images
+The test uses images from `reference/target_cube/` to register an object, then randomly selects one image to test pose estimation.

tests/reference/target_cube/image_001.jpg ADDED Viewed

tests/reference/target_cube/image_002.jpg ADDED Viewed

tests/reference/target_cube/image_003.jpg ADDED Viewed

tests/reference/target_cube/image_004.jpg ADDED Viewed

tests/reference/target_cube/image_005.jpg ADDED Viewed

tests/reference/target_cube/image_006.jpg ADDED Viewed

tests/reference/target_cube/image_007.jpg ADDED Viewed

tests/reference/target_cube/image_008.jpg ADDED Viewed

tests/reference/target_cube/image_009.jpg ADDED Viewed

tests/reference/target_cube/image_010.jpg ADDED Viewed

tests/reference/target_cube/image_011.jpg ADDED Viewed

tests/reference/target_cube/image_012.jpg ADDED Viewed

tests/reference/target_cube/image_013.jpg ADDED Viewed

tests/reference/target_cube/image_014.jpg ADDED Viewed

tests/reference/target_cube/image_015.jpg ADDED Viewed

tests/test_estimator.py ADDED Viewed

	@@ -0,0 +1,183 @@

+"""
+Test script for FoundationPose HuggingFace API.
+This test verifies that the API can:
+1. Load reference images
+2. Initialize an object with reference images
+3. Estimate pose from a query image
+"""
+import sys
+from pathlib import Path
+import random
+import cv2
+# Add parent directory to path to import client
+sys.path.insert(0, str(Path(__file__).parent.parent))
+from client import FoundationPoseClient
+def load_reference_images(reference_dir: Path):
+    """Load all reference images from directory."""
+    image_files = sorted(reference_dir.glob("*.jpg"))
+    images = []
+    for img_path in image_files:
+        # Use cv2 to load images (same as client.py)
+        img = cv2.imread(str(img_path))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        images.append(img)
+    return images, image_files
+def test_client_initialization():
+    """Test that API client initializes without errors."""
+    print("=" * 60)
+    print("Test 1: API Client Initialization")
+    print("=" * 60)
+    try:
+        client = FoundationPoseClient(api_url="https://gpue-foundationpose.hf.space")
+        print("✓ API client initialized successfully")
+        return client
+    except Exception as e:
+        print(f"✗ API client initialization failed: {e}")
+        return None
+def test_object_initialization(client, reference_images):
+    """Test object initialization with reference images via API."""
+    print("\n" + "=" * 60)
+    print("Test 2: Object Initialization via API")
+    print("=" * 60)
+    # Define camera intrinsics (typical values for RGB camera)
+    camera_intrinsics = {
+        "fx": 600.0,
+        "fy": 600.0,
+        "cx": 320.0,
+        "cy": 240.0
+    }
+    try:
+        success = client.initialize(
+            object_id="target_cube",
+            reference_images=reference_images,
+            camera_intrinsics=camera_intrinsics
+        )
+        if success:
+            print(f"✓ Object initialized successfully with {len(reference_images)} reference images")
+            return True
+        else:
+            print("✗ Object initialization failed")
+            return False
+    except Exception as e:
+        print(f"✗ Object initialization failed with exception: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def test_pose_estimation(client, query_image, query_name):
+    """Test pose estimation on a query image via API."""
+    print("\n" + "=" * 60)
+    print("Test 3: Pose Estimation via API")
+    print("=" * 60)
+    print(f"Query image: {query_name}")
+    # Define camera intrinsics (same as initialization)
+    camera_intrinsics = {
+        "fx": 600.0,
+        "fy": 600.0,
+        "cx": 320.0,
+        "cy": 240.0
+    }
+    try:
+        poses = client.estimate_pose(
+            object_id="target_cube",
+            query_image=query_image,
+            camera_intrinsics=camera_intrinsics
+        )
+        if poses and len(poses) > 0:
+            print(f"✓ Pose estimation completed successfully (detected {len(poses)} object(s))")
+            for i, pose in enumerate(poses):
+                print(f"\nDetected Object {i+1}:")
+                print(f"  Position: x={pose['position']['x']:.3f}, "
+                      f"y={pose['position']['y']:.3f}, "
+                      f"z={pose['position']['z']:.3f}")
+                print(f"  Orientation (quaternion): w={pose['orientation']['w']:.3f}, "
+                      f"x={pose['orientation']['x']:.3f}, "
+                      f"y={pose['orientation']['y']:.3f}, "
+                      f"z={pose['orientation']['z']:.3f}")
+                print(f"  Confidence: {pose['confidence']:.3f}")
+            return True
+        else:
+            print("✗ Pose estimation returned no detections")
+            return False
+    except Exception as e:
+        print(f"✗ Pose estimation failed with exception: {e}")
+        import traceback
+        traceback.print_exc()
+        return False
+def main():
+    """Run all tests."""
+    print("\n" + "=" * 60)
+    print("FoundationPose HuggingFace API Test Suite")
+    print("=" * 60)
+    # Setup paths
+    test_dir = Path(__file__).parent
+    reference_dir = test_dir / "reference" / "target_cube"
+    if not reference_dir.exists():
+        print(f"✗ Reference directory not found: {reference_dir}")
+        return
+    # Load reference images
+    print(f"\nLoading reference images from: {reference_dir}")
+    reference_images, image_files = load_reference_images(reference_dir)
+    print(f"✓ Loaded {len(reference_images)} reference images")
+    # Test 1: Initialize API client
+    client = test_client_initialization()
+    if client is None:
+        print("\n" + "=" * 60)
+        print("TESTS ABORTED: API client initialization failed")
+        print("=" * 60)
+        return
+    # Test 2: Initialize object via API
+    success = test_object_initialization(client, reference_images)
+    if not success:
+        print("\n" + "=" * 60)
+        print("TESTS ABORTED: Object initialization failed")
+        print("=" * 60)
+        return
+    # Test 3: Estimate pose on a random reference image
+    random_idx = random.randint(0, len(reference_images) - 1)
+    query_image = reference_images[random_idx]
+    query_name = image_files[random_idx].name
+    success = test_pose_estimation(client, query_image, query_name)
+    # Print final results
+    print("\n" + "=" * 60)
+    if success:
+        print("ALL TESTS PASSED ✓")
+    else:
+        print("SOME TESTS FAILED ✗")
+    print("=" * 60)
+if __name__ == "__main__":
+    main()