TTI / Dev /test_vlac_service.py
JosephBai's picture
Upload folder using huggingface_hub
857c2e9 verified
#!/usr/bin/env python3
"""
Test script for VLAC Service
Tests all endpoints with sample data to verify the service is working correctly.
"""
import base64
import json
import requests
import time
from io import BytesIO
from pathlib import Path
from PIL import Image, ImageDraw
def create_test_image(size=(448, 448), color=(255, 0, 0), text="Test"):
"""Create a simple test image."""
img = Image.new('RGB', size, color)
draw = ImageDraw.Draw(img)
draw.text((10, 10), text, fill=(255, 255, 255))
return img
def image_to_base64(img: Image.Image) -> str:
"""Convert PIL image to base64 string."""
buffer = BytesIO()
img.save(buffer, format='JPEG')
return base64.b64encode(buffer.getvalue()).decode()
def test_service(base_url: str = "http://localhost:8111"):
"""Test all service endpoints."""
print(f"Testing VLAC service at {base_url}")
# Test 1: Health check
print("\n1. Testing /healthcheck...")
try:
response = requests.post(f"{base_url}/healthcheck", timeout=10)
print(f"Status: {response.status_code}")
print(f"Response: {response.json()}")
assert response.status_code == 200
print("βœ“ Health check passed")
except Exception as e:
print(f"βœ— Health check failed: {e}")
return False
# Create test images
img_a = create_test_image(color=(255, 0, 0), text="Image A")
img_b = create_test_image(color=(0, 255, 0), text="Image B")
img_c = create_test_image(color=(0, 0, 255), text="Image C")
img_a_b64 = image_to_base64(img_a)
img_b_b64 = image_to_base64(img_b)
img_c_b64 = image_to_base64(img_c)
# Test 2: Pairwise critic
print("\n2. Testing /pairwise-critic...")
try:
payload = {
"task": "Pick up the red bowl and place it in the white box.",
"image_a": img_a_b64,
"image_b": img_b_b64,
"rich": False
}
response = requests.post(f"{base_url}/pairwise-critic", json=payload, timeout=30)
print(f"Status: {response.status_code}")
result = response.json()
print(f"Response: {result}")
assert response.status_code == 200
assert "critic" in result
assert "raw" in result
print("βœ“ Pairwise critic passed")
except Exception as e:
print(f"βœ— Pairwise critic failed: {e}")
return False
# Test 3: Done detection
print("\n3. Testing /done...")
try:
payload = {
"task": "Pick up the red bowl and place it in the white box.",
"first_frame": img_a_b64,
"prev_frame": img_b_b64,
"curr_frame": img_c_b64,
"reference": [img_a_b64, img_b_b64] # Optional reference
}
response = requests.post(f"{base_url}/done", json=payload, timeout=30)
print(f"Status: {response.status_code}")
result = response.json()
print(f"Response: {result}")
assert response.status_code == 200
assert "done" in result
assert "prob" in result
print("βœ“ Done detection passed")
except Exception as e:
print(f"βœ— Done detection failed: {e}")
return False
# Test 4: Trajectory critic
print("\n4. Testing /trajectory-critic...")
try:
payload = {
"task": "Pick up the red bowl and place it in the white box.",
"frames": [img_a_b64, img_b_b64, img_c_b64],
"reference": [img_a_b64, img_b_b64], # Need at least 2 reference images
"skip": 1,
"ref_num": 2, # ref_num must be >= 2 when reference images provided
"batch_size": 2,
"think": False,
"return_video": False
}
response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=60)
print(f"Status: {response.status_code}")
result = response.json()
print(f"Response keys: {list(result.keys())}")
print(f"Value list length: {len(result.get('value_list', []))}")
print(f"Critic list length: {len(result.get('critic_list', []))}")
assert response.status_code == 200
assert "value_list" in result
assert "critic_list" in result
print("βœ“ Trajectory critic passed")
except Exception as e:
print(f"βœ— Trajectory critic failed: {e}")
return False
# Test 4b: Trajectory critic without reference images
print("\n4b. Testing /trajectory-critic without reference...")
try:
payload = {
"task": "Pick up the red bowl and place it in the white box.",
"frames": [img_a_b64, img_b_b64, img_c_b64],
"reference": None, # No reference images
"skip": 1,
"ref_num": 0, # ref_num=0 when no reference images
"batch_size": 2,
"think": False,
"return_video": False
}
response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=60)
print(f"Status: {response.status_code}")
result = response.json()
print(f"Response keys: {list(result.keys())}")
print(f"Value list length: {len(result.get('value_list', []))}")
print(f"Critic list length: {len(result.get('critic_list', []))}")
assert response.status_code == 200
assert "value_list" in result
assert "critic_list" in result
print("βœ“ Trajectory critic without reference passed")
except Exception as e:
print(f"βœ— Trajectory critic without reference failed: {e}")
return False
# Test 5: Large batch size (should be chunked automatically)
print("\n5. Testing large batch size chunking...")
try:
# Create 15 frames (should be chunked into batches of 8)
large_frames = [image_to_base64(create_test_image(text=f"Frame {i}")) for i in range(15)]
payload = {
"task": "Pick up the red bowl and place it in the white box.",
"frames": large_frames,
"skip": 1,
"ref_num": 0, # No reference images for large batch test
"batch_size": 15, # This should be chunked
"think": False,
"return_video": False
}
response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=120)
print(f"Status: {response.status_code}")
result = response.json()
print(f"Processed {len(large_frames)} frames")
print(f"Got {len(result.get('value_list', []))} values")
assert response.status_code == 200
print("βœ“ Large batch chunking passed")
except Exception as e:
print(f"βœ— Large batch chunking failed: {e}")
return False
print("\nπŸŽ‰ All tests passed!")
return True
def test_error_cases(base_url: str = "http://localhost:8111"):
"""Test error handling."""
print(f"\nTesting error cases...")
# Test invalid base64
try:
payload = {
"task": "Test task",
"image_a": "invalid_base64",
"image_b": "also_invalid",
"rich": False
}
response = requests.post(f"{base_url}/pairwise-critic", json=payload, timeout=10)
print(f"Invalid base64 status: {response.status_code}")
assert response.status_code == 400
print("βœ“ Invalid base64 handled correctly")
except Exception as e:
print(f"βœ— Error case test failed: {e}")
# Test invalid ref_num for trajectory-critic
try:
img = create_test_image()
img_b64 = image_to_base64(img)
payload = {
"task": "Test task",
"frames": [img_b64, img_b64],
"reference": [img_b64], # Only 1 reference image
"ref_num": 1, # This should cause validation error
"batch_size": 2
}
response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=10)
print(f"Invalid ref_num status: {response.status_code}")
assert response.status_code == 422
print("βœ“ Invalid ref_num validation works")
except Exception as e:
print(f"βœ— ref_num validation test failed: {e}")
# Test invalid reference count for done detection
try:
img = create_test_image()
img_b64 = image_to_base64(img)
payload = {
"task": "Test task",
"first_frame": img_b64,
"prev_frame": img_b64,
"curr_frame": img_b64,
"reference": [img_b64] # Only 1 reference image, should require 2+
}
response = requests.post(f"{base_url}/done", json=payload, timeout=10)
print(f"Invalid done reference count status: {response.status_code}")
assert response.status_code == 422
print("βœ“ Invalid done reference count validation works")
except Exception as e:
print(f"βœ— Done reference validation test failed: {e}")
def main():
import argparse
parser = argparse.ArgumentParser(description="Test VLAC Service")
parser.add_argument("--url", default="http://localhost:8111", help="Service URL")
parser.add_argument("--wait", type=int, default=5, help="Seconds to wait for service startup")
args = parser.parse_args()
print(f"Waiting {args.wait} seconds for service to start...")
time.sleep(args.wait)
success = test_service(args.url)
if success:
test_error_cases(args.url)
return 0 if success else 1
if __name__ == "__main__":
exit(main())