Spaces:
Sleeping
Sleeping
Enhance: Multi-step iterative tasks
Browse files- inference.py +14 -4
inference.py
CHANGED
|
@@ -44,7 +44,16 @@ BENCHMARK = "cloud_ops_env"
|
|
| 44 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 45 |
"""
|
| 46 |
You are an expert Cloud SRE (Site Reliability Engineer). Your goal is to optimize cloud infrastructure
|
| 47 |
-
to meet the SLA requirements while minimizing costs.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
Available instance types (cost per month, capacity):
|
| 50 |
- t3.nano: $3.60, capacity 1.0
|
|
@@ -92,7 +101,7 @@ def step_env(message: str) -> dict:
|
|
| 92 |
return resp.json()
|
| 93 |
|
| 94 |
|
| 95 |
-
def build_user_prompt(obs_data: dict) -> str:
|
| 96 |
inventory = obs_data.get("inventory", [])
|
| 97 |
metrics = obs_data.get("metrics", {})
|
| 98 |
sla = obs_data.get("sla", {})
|
|
@@ -102,7 +111,7 @@ def build_user_prompt(obs_data: dict) -> str:
|
|
| 102 |
for r in inventory
|
| 103 |
])
|
| 104 |
|
| 105 |
-
prompt = f"""Current Infrastructure:
|
| 106 |
{inv_str}
|
| 107 |
|
| 108 |
Metrics:
|
|
@@ -115,6 +124,7 @@ SLA Requirements:
|
|
| 115 |
|
| 116 |
Task: {obs_data.get('task_name', 'Optimize')} ({obs_data.get('difficulty', 'easy')})
|
| 117 |
|
|
|
|
| 118 |
Provide your next command:"""
|
| 119 |
|
| 120 |
return prompt
|
|
@@ -178,7 +188,7 @@ def run_task(client: OpenAI, task_key: str, verbose: bool = False) -> dict:
|
|
| 178 |
if done:
|
| 179 |
break
|
| 180 |
|
| 181 |
-
user_prompt = build_user_prompt(obs_data)
|
| 182 |
response_text = call_model(client, user_prompt, history)
|
| 183 |
history.append({"role": "assistant", "content": response_text})
|
| 184 |
|
|
|
|
| 44 |
SYSTEM_PROMPT = textwrap.dedent(
|
| 45 |
"""
|
| 46 |
You are an expert Cloud SRE (Site Reliability Engineer). Your goal is to optimize cloud infrastructure
|
| 47 |
+
to meet the SLA requirements while minimizing costs through ITERATIVE exploration.
|
| 48 |
+
|
| 49 |
+
IMPORTANT: This is a multi-step optimization task. You must make 3-5 changes to find the optimal configuration.
|
| 50 |
+
Do NOT expect to get high rewards with just 1-2 changes!
|
| 51 |
+
|
| 52 |
+
Strategy:
|
| 53 |
+
1. Analyze the current infrastructure and identify overloaded servers
|
| 54 |
+
2. Make incremental changes - upgrade one server at a time
|
| 55 |
+
3. Evaluate the result and adjust your next action
|
| 56 |
+
4. Continue until you find the optimal configuration
|
| 57 |
|
| 58 |
Available instance types (cost per month, capacity):
|
| 59 |
- t3.nano: $3.60, capacity 1.0
|
|
|
|
| 101 |
return resp.json()
|
| 102 |
|
| 103 |
|
| 104 |
+
def build_user_prompt(obs_data: dict, step_num: int) -> str:
|
| 105 |
inventory = obs_data.get("inventory", [])
|
| 106 |
metrics = obs_data.get("metrics", {})
|
| 107 |
sla = obs_data.get("sla", {})
|
|
|
|
| 111 |
for r in inventory
|
| 112 |
])
|
| 113 |
|
| 114 |
+
prompt = f"""Current Infrastructure (Step {step_num}):
|
| 115 |
{inv_str}
|
| 116 |
|
| 117 |
Metrics:
|
|
|
|
| 124 |
|
| 125 |
Task: {obs_data.get('task_name', 'Optimize')} ({obs_data.get('difficulty', 'easy')})
|
| 126 |
|
| 127 |
+
Remember: Make incremental changes. Evaluate result, then adjust. Aim for 3-5 changes total.
|
| 128 |
Provide your next command:"""
|
| 129 |
|
| 130 |
return prompt
|
|
|
|
| 188 |
if done:
|
| 189 |
break
|
| 190 |
|
| 191 |
+
user_prompt = build_user_prompt(obs_data, step)
|
| 192 |
response_text = call_model(client, user_prompt, history)
|
| 193 |
history.append({"role": "assistant", "content": response_text})
|
| 194 |
|