TTI / Dev /test_vlac_service.py

Upload folder using huggingface_hub

857c2e9 verified 8 days ago

9.56 kB

	#!/usr/bin/env python3
	"""
	Test script for VLAC Service

	Tests all endpoints with sample data to verify the service is working correctly.
	"""

	import base64
	import json
	import requests
	import time
	from io import BytesIO
	from pathlib import Path
	from PIL import Image, ImageDraw


	def create_test_image(size=(448, 448), color=(255, 0, 0), text="Test"):
	"""Create a simple test image."""
	img = Image.new('RGB', size, color)
	draw = ImageDraw.Draw(img)
	draw.text((10, 10), text, fill=(255, 255, 255))
	return img


	def image_to_base64(img: Image.Image) -> str:
	"""Convert PIL image to base64 string."""
	buffer = BytesIO()
	img.save(buffer, format='JPEG')
	return base64.b64encode(buffer.getvalue()).decode()


	def test_service(base_url: str = "http://localhost:8111"):
	"""Test all service endpoints."""
	print(f"Testing VLAC service at {base_url}")

	# Test 1: Health check
	print("\n1. Testing /healthcheck...")
	try:
	response = requests.post(f"{base_url}/healthcheck", timeout=10)
	print(f"Status: {response.status_code}")
	print(f"Response: {response.json()}")
	assert response.status_code == 200
	print("✓ Health check passed")
	except Exception as e:
	print(f"✗ Health check failed: {e}")
	return False

	# Create test images
	img_a = create_test_image(color=(255, 0, 0), text="Image A")
	img_b = create_test_image(color=(0, 255, 0), text="Image B")
	img_c = create_test_image(color=(0, 0, 255), text="Image C")

	img_a_b64 = image_to_base64(img_a)
	img_b_b64 = image_to_base64(img_b)
	img_c_b64 = image_to_base64(img_c)

	# Test 2: Pairwise critic
	print("\n2. Testing /pairwise-critic...")
	try:
	payload = {
	"task": "Pick up the red bowl and place it in the white box.",
	"image_a": img_a_b64,
	"image_b": img_b_b64,
	"rich": False
	}
	response = requests.post(f"{base_url}/pairwise-critic", json=payload, timeout=30)
	print(f"Status: {response.status_code}")
	result = response.json()
	print(f"Response: {result}")
	assert response.status_code == 200
	assert "critic" in result
	assert "raw" in result
	print("✓ Pairwise critic passed")
	except Exception as e:
	print(f"✗ Pairwise critic failed: {e}")
	return False

	# Test 3: Done detection
	print("\n3. Testing /done...")
	try:
	payload = {
	"task": "Pick up the red bowl and place it in the white box.",
	"first_frame": img_a_b64,
	"prev_frame": img_b_b64,
	"curr_frame": img_c_b64,
	"reference": [img_a_b64, img_b_b64] # Optional reference
	}
	response = requests.post(f"{base_url}/done", json=payload, timeout=30)
	print(f"Status: {response.status_code}")
	result = response.json()
	print(f"Response: {result}")
	assert response.status_code == 200
	assert "done" in result
	assert "prob" in result
	print("✓ Done detection passed")
	except Exception as e:
	print(f"✗ Done detection failed: {e}")
	return False

	# Test 4: Trajectory critic
	print("\n4. Testing /trajectory-critic...")
	try:
	payload = {
	"task": "Pick up the red bowl and place it in the white box.",
	"frames": [img_a_b64, img_b_b64, img_c_b64],
	"reference": [img_a_b64, img_b_b64], # Need at least 2 reference images
	"skip": 1,
	"ref_num": 2, # ref_num must be >= 2 when reference images provided
	"batch_size": 2,
	"think": False,
	"return_video": False
	}
	response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=60)
	print(f"Status: {response.status_code}")
	result = response.json()
	print(f"Response keys: {list(result.keys())}")
	print(f"Value list length: {len(result.get('value_list', []))}")
	print(f"Critic list length: {len(result.get('critic_list', []))}")
	assert response.status_code == 200
	assert "value_list" in result
	assert "critic_list" in result
	print("✓ Trajectory critic passed")
	except Exception as e:
	print(f"✗ Trajectory critic failed: {e}")
	return False

	# Test 4b: Trajectory critic without reference images
	print("\n4b. Testing /trajectory-critic without reference...")
	try:
	payload = {
	"task": "Pick up the red bowl and place it in the white box.",
	"frames": [img_a_b64, img_b_b64, img_c_b64],
	"reference": None, # No reference images
	"skip": 1,
	"ref_num": 0, # ref_num=0 when no reference images
	"batch_size": 2,
	"think": False,
	"return_video": False
	}
	response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=60)
	print(f"Status: {response.status_code}")
	result = response.json()
	print(f"Response keys: {list(result.keys())}")
	print(f"Value list length: {len(result.get('value_list', []))}")
	print(f"Critic list length: {len(result.get('critic_list', []))}")
	assert response.status_code == 200
	assert "value_list" in result
	assert "critic_list" in result
	print("✓ Trajectory critic without reference passed")
	except Exception as e:
	print(f"✗ Trajectory critic without reference failed: {e}")
	return False

	# Test 5: Large batch size (should be chunked automatically)
	print("\n5. Testing large batch size chunking...")
	try:
	# Create 15 frames (should be chunked into batches of 8)
	large_frames = [image_to_base64(create_test_image(text=f"Frame {i}")) for i in range(15)]
	payload = {
	"task": "Pick up the red bowl and place it in the white box.",
	"frames": large_frames,
	"skip": 1,
	"ref_num": 0, # No reference images for large batch test
	"batch_size": 15, # This should be chunked
	"think": False,
	"return_video": False
	}
	response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=120)
	print(f"Status: {response.status_code}")
	result = response.json()
	print(f"Processed {len(large_frames)} frames")
	print(f"Got {len(result.get('value_list', []))} values")
	assert response.status_code == 200
	print("✓ Large batch chunking passed")
	except Exception as e:
	print(f"✗ Large batch chunking failed: {e}")
	return False

	print("\n🎉 All tests passed!")
	return True


	def test_error_cases(base_url: str = "http://localhost:8111"):
	"""Test error handling."""
	print(f"\nTesting error cases...")

	# Test invalid base64
	try:
	payload = {
	"task": "Test task",
	"image_a": "invalid_base64",
	"image_b": "also_invalid",
	"rich": False
	}
	response = requests.post(f"{base_url}/pairwise-critic", json=payload, timeout=10)
	print(f"Invalid base64 status: {response.status_code}")
	assert response.status_code == 400
	print("✓ Invalid base64 handled correctly")
	except Exception as e:
	print(f"✗ Error case test failed: {e}")

	# Test invalid ref_num for trajectory-critic
	try:
	img = create_test_image()
	img_b64 = image_to_base64(img)
	payload = {
	"task": "Test task",
	"frames": [img_b64, img_b64],
	"reference": [img_b64], # Only 1 reference image
	"ref_num": 1, # This should cause validation error
	"batch_size": 2
	}
	response = requests.post(f"{base_url}/trajectory-critic", json=payload, timeout=10)
	print(f"Invalid ref_num status: {response.status_code}")
	assert response.status_code == 422
	print("✓ Invalid ref_num validation works")
	except Exception as e:
	print(f"✗ ref_num validation test failed: {e}")

	# Test invalid reference count for done detection
	try:
	img = create_test_image()
	img_b64 = image_to_base64(img)
	payload = {
	"task": "Test task",
	"first_frame": img_b64,
	"prev_frame": img_b64,
	"curr_frame": img_b64,
	"reference": [img_b64] # Only 1 reference image, should require 2+
	}
	response = requests.post(f"{base_url}/done", json=payload, timeout=10)
	print(f"Invalid done reference count status: {response.status_code}")
	assert response.status_code == 422
	print("✓ Invalid done reference count validation works")
	except Exception as e:
	print(f"✗ Done reference validation test failed: {e}")


	def main():
	import argparse
	parser = argparse.ArgumentParser(description="Test VLAC Service")
	parser.add_argument("--url", default="http://localhost:8111", help="Service URL")
	parser.add_argument("--wait", type=int, default=5, help="Seconds to wait for service startup")
	args = parser.parse_args()

	print(f"Waiting {args.wait} seconds for service to start...")
	time.sleep(args.wait)

	success = test_service(args.url)
	if success:
	test_error_cases(args.url)

	return 0 if success else 1


	if __name__ == "__main__":
	exit(main())