File size: 6,470 Bytes
cc75d6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Inference script for Container Yard environment using OpenAI API.

This script evaluates a language model's ability to solve container yard placement
tasks using the hackathon-specified output format.
"""

import os
import sys
import json
from typing import Optional

# Load environment variables from .env file if it exists
try:
    from dotenv import load_dotenv
    load_dotenv()
except ImportError:
    pass  # python-dotenv not installed, use system env vars

from openai import OpenAI

# Read environment variables with defaults
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4o-mini")
HF_TOKEN = os.getenv("HF_TOKEN")

if HF_TOKEN is None:
    raise ValueError("HF_TOKEN environment variable is required")

# Initialize OpenAI client
client = OpenAI(
    base_url=API_BASE_URL,
    api_key=HF_TOKEN
)

# Import environment
from server.Container_Yard_environment import ContainerYardEnvironment
from models import ContainerYardAction


def extract_stack_choice(response: str, num_stacks: int) -> Optional[int]:
    """
    Extract stack choice from LLM response.
    
    Looks for patterns like "stack 0", "stack=3", or just a number.
    Returns None if extraction fails.
    """
    response_lower = response.lower().strip()
    
    # Try pattern: "stack X"
    words = response_lower.split()
    for i, word in enumerate(words):
        if "stack" in word and i + 1 < len(words):
            try:
                stack_idx = int(words[i + 1])
                if 0 <= stack_idx < num_stacks:
                    return stack_idx
            except ValueError:
                pass
    
    # Try extracting any number
    import re
    numbers = re.findall(r'\d+', response_lower)
    if numbers:
        try:
            stack_idx = int(numbers[0])
            if 0 <= stack_idx < num_stacks:
                return stack_idx
        except (ValueError, IndexError):
            pass
    
    return None


def run_task(task_name: str = "medium") -> dict:
    """
    Run a single task in the Container Yard environment.
    
    Args:
        task_name: "easy", "medium", or "hard"
        
    Returns:
        dict with episode results
    """
    env = ContainerYardEnvironment(task_name=task_name)
    obs = env.reset()

    print(f"[START] task={task_name} env=container-yard model={MODEL_NAME}")
    sys.stdout.flush()

    step_count = 0
    all_rewards = []
    success = False
    last_error = None
    efficiency_score = 0.0
    
    try:
        while not obs.done and step_count < 100:
            step_count += 1

            prompt = f"""You are managing a container yard. 

Current state:
- Container to place: ID={obs.current_container_id}, Priority={obs.current_container_priority}
- Available stacks: {obs.num_stacks} stacks (0-{obs.num_stacks-1})
- Max stack height: {obs.max_stack_height}
- Current stacks: {json.dumps(obs.stacks)}
- Rehandles so far: {obs.rehandles_so_far}

Place the container in the stack that minimizes future rehandles.
Reply with ONLY the stack number (0-{obs.num_stacks-1}). No explanation needed."""

            try:
                response = client.chat.completions.create(
                    model=MODEL_NAME,
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.7,
                    max_tokens=10,
                )
                action_str = (response.choices[0].message.content or "").strip()
            except Exception as e:
                action_str = "0"
                last_error = str(e)

            stack_idx = extract_stack_choice(action_str, obs.num_stacks)
            if stack_idx is None:
                stack_idx = 0

            try:
                action = ContainerYardAction(stack_index=stack_idx)
                obs = env.step(action)

                reward_value = float(obs.reward or 0.0)
                all_rewards.append(reward_value)

                error_msg = obs.action_error if obs.action_error else "null"
                print(
                    f"[STEP] step={step_count} action=place({stack_idx}) "
                    f"reward={reward_value:.2f} done={str(obs.done).lower()} error={error_msg}"
                )
                sys.stdout.flush()

                if obs.done:
                    success = True
                    break

            except Exception as e:
                last_error = str(e)
                print(f"[STEP] step={step_count} action=place({stack_idx}) reward=0.00 done=true error={last_error}")
                sys.stdout.flush()
                break

        if step_count > 0:
            efficiency_score = 1.0 - (obs.rehandles_so_far / max(obs.total_containers, 1))
            success = success and step_count == obs.total_containers

    except Exception as e:
        last_error = str(e)

    finally:
        close_fn = getattr(env, "close", None)
        if callable(close_fn):
            try:
                close_fn()
            except Exception:
                pass

        rewards_str = ",".join([f"{r:.2f}" for r in all_rewards])
        print(f"[END] success={str(success).lower()} steps={step_count} rewards={rewards_str}")
        sys.stdout.flush()
    
    return {
        "task": task_name,
        "success": success,
        "steps": step_count,
        "total_rewards": sum(all_rewards),
        "rehandles": obs.rehandles_so_far,
        "efficiency": efficiency_score,
    }


def main():
    """Run all three tasks."""
    tasks = ["easy", "medium", "hard"]
    results = []
    
    for task in tasks:
        try:
            result = run_task(task)
            results.append(result)
        except Exception as e:
            print(f"[ERROR] Task {task} failed: {e}", file=sys.stderr)
            results.append({
                "task": task,
                "success": False,
                "steps": 0,
                "total_rewards": 0.0,
                "error": str(e),
            })
    
    # Summary
    print("\n=== Summary ===", file=sys.stderr)
    for result in results:
        print(f"Task {result['task']}: success={result['success']}, efficiency={result.get('efficiency', 0.0):.2f}", file=sys.stderr)


if __name__ == "__main__":
    main()