""" Inference Script — GridOps Microgrid Environment =================================== MANDATORY - Before submitting, ensure the following variables are defined in your environment configuration: API_BASE_URL The API endpoint for the LLM. MODEL_NAME The model identifier to use for inference. HF_TOKEN Your Hugging Face / API key. - The inference script must be named `inference.py` and placed in the root directory of the project - Participants must use OpenAI Client for all LLM calls using above variables """ import json import os import sys from openai import OpenAI # ── Env vars (as required by hackathon) ────────────────────────────────── API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1") API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3.3-70B-Instruct") # ── Environment import (runs in-process, no server needed) ────────────── sys.path.insert(0, os.path.dirname(__file__)) from gridops.server.environment import GridOpsEnvironment from gridops.models import GridOpsAction TASKS = ["task_1_normal", "task_2_heatwave", "task_3_crisis"] MAX_STEPS = 72 TEMPERATURE = 0.1 MAX_TOKENS = 500 # higher to support reasoning models that emit thinking before JSON SYSTEM_PROMPT = """\ You are an expert microgrid operator managing a 100-home community in India during summer. You control three actions each hour: - battery_dispatch: -1 (charge 100 kW from grid) to +1 (discharge 100 kW to community) - diesel_dispatch: 0 (off) to 1 (100 kW). Costs Rs 25/kWh + Rs 100 startup if was off. - demand_shedding: 0 (none) to 1 (shed 20% of demand). WARNING: 100% rebounds next hour! Rs 40/kWh penalty. The GRID automatically absorbs the residual (capped at ±200 kW). If demand exceeds grid + solar + battery + diesel → BLACKOUT (Rs 150/kWh penalty!). Key economics: - Grid prices vary Rs 3-20/kWh. Cheap at night, expensive evening. - Battery: 500 kWh, 100 kW max, 90% round-trip efficiency, Rs 2.5/kWh degradation. - Solar: 250 kW peak (free!), bell curve 6AM-6PM, zero at night. - Demand: ~100 kW avg, 250 kW evening peak. Grid cap = 200 kW → need battery for gap. Strategy: 1. Night (0-6h): charge battery (cheap grid, low demand) 2. Solar (6-15h): surplus charges battery or exports 3. Pre-peak (15-17h): ensure battery > 70% 4. Evening peak (18-22h): discharge battery to cover gap above grid 200 kW cap 5. Diesel: only when battery empty AND peak demand. Avoid startup costs. Respond ONLY with valid JSON: {"battery_dispatch": float, "diesel_dispatch": float, "demand_shedding": float}""" def format_observation(obs: dict) -> str: """Format observation into a readable prompt for the LLM.""" return ( f"Hour {obs['hour']:.0f}/72 (Day {obs.get('day_of_episode', '?')})\n" f"Demand: {obs['demand_kw']:.0f} kW | Solar: {obs['solar_kw']:.0f} kW\n" f"Battery SOC: {obs['battery_soc']*100:.0f}% | Grid Price: Rs {obs['grid_price']:.1f}/kWh\n" f"Diesel Fuel: {obs['diesel_fuel_remaining']*100:.0f}% | Diesel On: {obs.get('diesel_is_on', False)}\n" f"Grid import last step: {obs.get('grid_kw_this_step', 0):.0f} kW\n" f"Forecasts (next 4h):\n" f" Demand: {[f'{v:.0f}' for v in obs.get('demand_forecast_4h', [])]}\n" f" Solar: {[f'{v:.0f}' for v in obs.get('solar_forecast_4h', [])]}\n" f" Price: {[f'{v:.1f}' for v in obs.get('price_forecast_4h', [])]}\n" f"Cumulative: blackout={obs['cumulative_blackout_kwh']:.1f} kWh, cost=Rs {obs['cumulative_cost']:.0f}\n" f"{obs.get('narration', '')}\n" f"\nWhat action? Reply with JSON only." ) def parse_action(text: str) -> dict: """Extract action JSON from LLM response.""" text = text.strip() for start, end in [("{", "}"), ("```json", "```")]: idx = text.find(start) if idx >= 0: if end == "}": eidx = text.rfind("}") + 1 else: eidx = text.find(end, idx + len(start)) try: return json.loads(text[idx:eidx]) except json.JSONDecodeError: continue return {"battery_dispatch": 0.0, "diesel_dispatch": 0.0, "demand_shedding": 0.0} def run_task(client: OpenAI, env: GridOpsEnvironment, task_id: str, seed: int = 42) -> dict: """Run one full episode on a task, return grade.""" obs = env.reset(seed=seed, task_id=task_id) obs_dict = obs.model_dump() # ── [START] structured output ── print(f"[START] task={task_id}", flush=True) messages = [{"role": "system", "content": SYSTEM_PROMPT}] for step_idx in range(MAX_STEPS): user_msg = format_observation(obs_dict) messages.append({"role": "user", "content": user_msg}) # Keep context manageable if len(messages) > 21: messages = [messages[0]] + messages[-20:] try: completion = client.chat.completions.create( model=MODEL_NAME, messages=messages, temperature=TEMPERATURE, max_tokens=MAX_TOKENS, ) reply = completion.choices[0].message.content or "" except Exception as e: reply = "{}" messages.append({"role": "assistant", "content": reply}) action_dict = parse_action(reply) action = GridOpsAction( battery_dispatch=max(-1.0, min(1.0, float(action_dict.get("battery_dispatch", 0.0)))), diesel_dispatch=max(0.0, min(1.0, float(action_dict.get("diesel_dispatch", 0.0)))), demand_shedding=max(0.0, min(1.0, float(action_dict.get("demand_shedding", 0.0)))), ) obs = env.step(action) obs_dict = obs.model_dump() # ── [STEP] structured output ── reward = obs_dict.get("reward", 0.0) print(f"[STEP] step={step_idx + 1} reward={reward:.4f}", flush=True) if obs_dict.get("done", False): break grade = env.state.grade score = grade["score"] if grade else 0.0 steps = step_idx + 1 # ── [END] structured output ── print(f"[END] task={task_id} score={score:.4f} steps={steps}", flush=True) return grade def main(): client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) env = GridOpsEnvironment() results = {} for task_id in TASKS: grade = run_task(client, env, task_id) results[task_id] = grade # Summary for task_id, grade in results.items(): score = grade["score"] if grade else 0.0 print(f"[SUMMARY] task={task_id} score={score:.4f}", flush=True) if __name__ == "__main__": main()