Spaces:

Otter21
/

Gov_Workflow_RL

Running

Gov_Workflow_RL / tests /manual_test_10_models.py

Siddharaj Shirke

deploy: clean code-only snapshot for HF Space

df97e68 10 days ago

1.74 kB

	import os
	import time
	from openai import OpenAI
	from dotenv import load_dotenv

	load_dotenv()

	key1 = os.getenv("NVIDIA_API_KEY")

	MODELS_10 = [
	"meta/llama-3.3-70b-instruct",
	"qwen/qwen3-next-80b-a3b-instruct",
	"moonshotai/kimi-k2-instruct-0905",
	"meta/llama-3.1-405b-instruct",
	"deepseek-ai/deepseek-v3.2",
	"qwen/qwq-32b",
	"mistralai/mixtral-8x22b-instruct-v0.1",
	"google/gemma-3-27b-it",
	"microsoft/phi-4-mini-instruct",
	"meta/llama-3.1-8b-instruct"
	]

	def test_model(model_name, api_key):
	if not api_key:
	return "SKIP (No API Key)"

	client = OpenAI(base_url="https://integrate.api.nvidia.com/v1", api_key=api_key)
	print(f"Testing {model_name:<40}... ", end="", flush=True)

	try:
	start = time.time()
	response = client.chat.completions.create(
	model=model_name,
	messages=[{"role": "user", "content": "Reply with 'OK' only."}],
	max_tokens=10,
	temperature=0.0,
	timeout=10
	)
	elapsed = time.time() - start
	content = response.choices[0].message.content.strip()
	print(f"SUCCESS ({elapsed:.2f}s) -> '{content}'")
	return "PASS"
	except Exception as e:
	print(f"FAILED: {str(e)[:100]}")
	return f"FAIL: {str(e)[:100]}"

	print("=== Testing 10-Model Sequence ===\n")
	results = {}
	for m in MODELS_10:
	results[m] = test_model(m, key1)
	time.sleep(1) # Small delay to avoid aggressive rate limits

	print("\n" + "="*60)
	fails = [m for m, s in results.items() if s.startswith("FAIL")]
	if fails:
	print(f"Summary: Found {len(fails)} failures: {', '.join(fails)}")
	else:
	print("Summary: All 10 models passed successfully!")
	print("="*60)