Spaces:

hirann
/

cloud-ops-optimizer

Sleeping

App Files Files Community

cloud-ops-optimizer / inference.py

hirann

Enhance: Multi-step iterative tasks

739386c verified about 1 month ago

raw

history blame contribute delete

10.6 kB

	#!/usr/bin/env python3
	"""
	Baseline Inference Script for CloudOps Optimizer Environment.

	Uses OpenAI Client + HTTP calls to the server to run a model against the environment.

	Usage:
	python inference.py

	Environment Variables:
	API_BASE_URL: The API endpoint (default: https://router.huggingface.co/v1)
	MODEL_NAME: The model identifier (default: Qwen/Qwen2.5-72B-Instruct)
	HF_TOKEN: Your Hugging Face / API key (required)
	SERVER_URL: The environment server URL (default: http://localhost:7860)

	Expected format for STDOUT:
	[START] task=<task_name> env=<benchmark> model=<model_name>
	[STEP] step=<n> action=<action_str> reward=<0.00> done=<true\|false> error=<msg\|null>
	[END] success=<true\|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
	"""

	import json
	import os
	import re
	import textwrap
	import time
	import requests
	from typing import List, Optional

	from openai import OpenAI


	API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
	MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
	HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGING_FACE_TOKEN")
	SERVER_URL = os.getenv("SERVER_URL", "http://localhost:7860")

	MAX_STEPS = 8
	MAX_TOKENS = 256
	TEMPERATURE = 0.7
	SUCCESS_SCORE_THRESHOLD = 0.5
	BENCHMARK = "cloud_ops_env"

	SYSTEM_PROMPT = textwrap.dedent(
	"""
	You are an expert Cloud SRE (Site Reliability Engineer). Your goal is to optimize cloud infrastructure
	to meet the SLA requirements while minimizing costs through ITERATIVE exploration.

	IMPORTANT: This is a multi-step optimization task. You must make 3-5 changes to find the optimal configuration.
	Do NOT expect to get high rewards with just 1-2 changes!

	Strategy:
	1. Analyze the current infrastructure and identify overloaded servers
	2. Make incremental changes - upgrade one server at a time
	3. Evaluate the result and adjust your next action
	4. Continue until you find the optimal configuration

	Available instance types (cost per month, capacity):
	- t3.nano: $3.60, capacity 1.0
	- t3.small: $11.50, capacity 2.0
	- t3.medium: $23.00, capacity 4.0
	- m5.large: $70.00, capacity 8.0
	- m5.xlarge: $140.00, capacity 16.0

	Command format: "change [resource_id] to [instance_type]"
	Example: "change srv-1 to t3.small"

	You must output ONLY the command, nothing else."""
	).strip()


	def log_start(task: str, env: str, model: str) -> None:
	print(f"[START] task={task} env={env} model={model}", flush=True)


	def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
	error_val = error if error else "null"
	done_val = str(done).lower()
	print(
	f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
	flush=True,
	)


	def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
	rewards_str = ",".join(f"{r:.2f}" for r in rewards)
	print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)


	def reset_env(task: str) -> dict:
	"""Reset the environment via HTTP."""
	resp = requests.get(f"{SERVER_URL}/reset", params={"task": task})
	resp.raise_for_status()
	return resp.json()


	def step_env(message: str) -> dict:
	"""Send action to environment via HTTP."""
	resp = requests.post(f"{SERVER_URL}/step", json={"message": message})
	resp.raise_for_status()
	return resp.json()


	def build_user_prompt(obs_data: dict, step_num: int) -> str:
	inventory = obs_data.get("inventory", [])
	metrics = obs_data.get("metrics", {})
	sla = obs_data.get("sla", {})

	inv_str = "\n".join([
	f" {r['id']}: {r['type']} - ${r['monthly_cost']}/mo, CPU: {r['cpu_usage']:.1f}%"
	for r in inventory
	])

	prompt = f"""Current Infrastructure (Step {step_num}):
	{inv_str}

	Metrics:
	- Latency: {metrics.get('avg_latency_ms', 0):.1f}ms
	- Error Rate: {metrics.get('error_rate', 0):.3f}

	SLA Requirements:
	- Max Latency: {sla.get('max_latency_ms', 0)}ms
	- Max Budget: ${sla.get('max_budget', 0)}/mo

	Task: {obs_data.get('task_name', 'Optimize')} ({obs_data.get('difficulty', 'easy')})

	Remember: Make incremental changes. Evaluate result, then adjust. Aim for 3-5 changes total.
	Provide your next command:"""

	return prompt


	def call_model(client: OpenAI, user_prompt: str, history: List[dict]) -> str:
	messages = [{"role": "system", "content": SYSTEM_PROMPT}]
	messages.extend(history)
	messages.append({"role": "user", "content": user_prompt})

	try:
	completion = client.chat.completions.create(
	model=MODEL_NAME,
	messages=messages,
	temperature=TEMPERATURE,
	max_tokens=MAX_TOKENS,
	stream=False,
	)
	text = (completion.choices[0].message.content or "").strip()

	# Extract just the command if model adds explanation
	lines = text.split('\n')
	for line in lines:
	line = line.strip()
	if line.startswith('change '):
	return line
	return text if text else "change srv-1 to t3.small"
	except Exception as exc:
	print(f"[DEBUG] Model request failed: {exc}", flush=True)
	return "change srv-1 to t3.small"


	TASKS = {
	"easy": {"task_id": "easy_right_sizing", "name": "Right-Sizing", "difficulty": "easy"},
	"medium": {"task_id": "medium_latency_fix", "name": "Latency Fix", "difficulty": "medium"},
	"hard": {"task_id": "hard_balance", "name": "Balance Optimization", "difficulty": "hard"},
	}


	def run_task(client: OpenAI, task_key: str, verbose: bool = False) -> dict:
	"""Run inference on a single task via HTTP."""
	task = TASKS[task_key]
	task_name = task["name"]

	history: List[dict] = []
	rewards: List[float] = []
	steps_taken = 0
	score = 0.0
	success = False
	error_msg = None

	log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)

	try:
	result = reset_env(task_key)
	obs_data = result.get("observation", {})

	done = result.get("done", False)

	for step in range(1, MAX_STEPS + 1):
	if done:
	break

	user_prompt = build_user_prompt(obs_data, step)
	response_text = call_model(client, user_prompt, history)
	history.append({"role": "assistant", "content": response_text})

	action_str = response_text[:50] + "..." if len(response_text) > 50 else response_text

	try:
	result = step_env(response_text)

	reward = result.get("reward", 0.0)
	done = result.get("done", False)
	error_msg = None
	obs_data = result.get("observation", {})

	info = result.get("info", {})
	if info.get("reason") == "crash":
	done = True
	reward = 0.0
	error_msg = "system_crash"

	except Exception as exc:
	error_msg = str(exc)
	reward = 0.0
	done = True
	obs_data = {}

	rewards.append(reward)
	steps_taken = step

	log_step(step=step, action=action_str, reward=reward, done=done, error=error_msg)

	if done:
	break

	max_reward = MAX_STEPS * 1.0
	score = sum(rewards) / max_reward if max_reward > 0 else 0.0
	score = min(max(score, 0.0), 1.0)
	success = score >= SUCCESS_SCORE_THRESHOLD

	except Exception as exc:
	error_msg = str(exc)
	print(f"[DEBUG] Task execution error: {exc}", flush=True)
	finally:
	log_end(success=success, steps=steps_taken, score=score, rewards=rewards)

	return {
	"task_id": task["task_id"],
	"task_name": task_name,
	"score": score,
	"success": success,
	"steps": steps_taken,
	"rewards": rewards,
	}


	def main():
	print("=" * 60)
	print("CloudOps Optimizer — Baseline Inference")
	print("=" * 60)
	print(f"API URL : {API_BASE_URL}")
	print(f"Model : {MODEL_NAME}")
	print(f"Server : {SERVER_URL}")
	print()

	if not HF_TOKEN:
	print("ERROR: HF_TOKEN not set")
	return

	# Test server connection
	try:
	resp = requests.get(f"{SERVER_URL}/health", timeout=5)
	if resp.status_code != 200:
	print(f"ERROR: Server returned {resp.status_code}")
	return
	print("Server connection: OK")
	except Exception as e:
	print(f"ERROR: Cannot connect to server at {SERVER_URL}")
	print(f" Make sure server is running: python main.py")
	return

	client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)

	task_keys = ["easy", "medium", "hard"]
	results = []

	for task_key in task_keys:
	task = TASKS[task_key]
	print(f"Running task: {task['name']} ({task['difficulty']})...")
	try:
	r = run_task(client, task_key, verbose=False)
	results.append(r)
	print(f" score={r['score']:.4f} steps={r['steps']}")
	except Exception as exc:
	print(f" ERROR: {exc}")
	results.append({
	"task_id": task["task_id"],
	"task_name": task["name"],
	"score": 0.0,
	"success": False,
	"steps": 0,
	"rewards": [],
	})

	print("\n" + "=" * 60)
	print("SUMMARY")
	print("=" * 60)
	total = 0.0
	for r in results:
	marker = {"easy": "[E]", "medium": "[M]", "hard": "[H]"}.get(r["task_id"].split("_")[0], "?")
	print(f"{marker} {r['task_id']:30s} score={r['score']:.4f}")
	total += r['score']

	avg = total / len(results) if results else 0.0
	print("-" * 40)
	print(f"Average score: {avg:.4f}")
	print()

	output_path = "inference_results.json"
	with open(output_path, "w") as f:
	json.dump(
	{
	"model": MODEL_NAME,
	"api_url": API_BASE_URL,
	"server_url": SERVER_URL,
	"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
	"average_score": avg,
	"results": results,
	},
	f,
	indent=2,
	)
	print(f"Results saved to: {output_path}")


	if __name__ == "__main__":
	main()