Upload folder using huggingface_hub
Browse files- Dockerfile +81 -0
- README.md +137 -5
- __init__.py +16 -0
- client.py +72 -0
- grader.py +30 -0
- inference.py +144 -0
- models.py +31 -0
- openenv.yaml +19 -0
- openenv_taskmanager.egg-info/PKG-INFO +9 -0
- openenv_taskmanager.egg-info/SOURCES.txt +14 -0
- openenv_taskmanager.egg-info/dependency_links.txt +1 -0
- openenv_taskmanager.egg-info/entry_points.txt +2 -0
- openenv_taskmanager.egg-info/requires.txt +5 -0
- openenv_taskmanager.egg-info/top_level.txt +1 -0
- pyproject.toml +45 -0
- server/__init__.py +11 -0
- server/app.py +108 -0
- server/requirements.txt +6 -0
- server/taskmanager_environment.py +169 -0
- test.py +0 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Multi-stage build using openenv-base
|
| 8 |
+
# This Dockerfile is flexible and works for both:
|
| 9 |
+
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
+
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
+
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
+
|
| 13 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 14 |
+
FROM ${BASE_IMAGE} AS builder
|
| 15 |
+
|
| 16 |
+
WORKDIR /app
|
| 17 |
+
|
| 18 |
+
# Ensure git is available (required for installing dependencies from VCS)
|
| 19 |
+
RUN apt-get update && \
|
| 20 |
+
apt-get install -y --no-install-recommends git && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/*
|
| 22 |
+
|
| 23 |
+
# Build argument to control whether we're building standalone or in-repo
|
| 24 |
+
ARG BUILD_MODE=in-repo
|
| 25 |
+
ARG ENV_NAME=myenv
|
| 26 |
+
|
| 27 |
+
# Copy environment code (always at root of build context)
|
| 28 |
+
COPY . /app/env
|
| 29 |
+
|
| 30 |
+
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
+
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
+
WORKDIR /app/env
|
| 33 |
+
|
| 34 |
+
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Install dependencies using uv sync
|
| 42 |
+
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
+
if [ -f uv.lock ]; then \
|
| 45 |
+
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
+
else \
|
| 47 |
+
uv sync --no-install-project --no-editable; \
|
| 48 |
+
fi
|
| 49 |
+
|
| 50 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
+
if [ -f uv.lock ]; then \
|
| 52 |
+
uv sync --frozen --no-editable; \
|
| 53 |
+
else \
|
| 54 |
+
uv sync --no-editable; \
|
| 55 |
+
fi
|
| 56 |
+
|
| 57 |
+
# Final runtime stage
|
| 58 |
+
FROM ${BASE_IMAGE}
|
| 59 |
+
|
| 60 |
+
WORKDIR /app
|
| 61 |
+
|
| 62 |
+
# Copy the virtual environment from builder
|
| 63 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 64 |
+
|
| 65 |
+
# Copy the environment code
|
| 66 |
+
COPY --from=builder /app/env /app/env
|
| 67 |
+
|
| 68 |
+
# Set PATH to use the virtual environment
|
| 69 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 70 |
+
|
| 71 |
+
# Set PYTHONPATH so imports work correctly
|
| 72 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 73 |
+
|
| 74 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 75 |
+
# Health check
|
| 76 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 77 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 78 |
+
|
| 79 |
+
# Run the FastAPI server
|
| 80 |
+
# The module path is constructed to work with the /app/env structure
|
| 81 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,142 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Taskmanager Environment Server
|
| 3 |
+
emoji: 🎬
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: red
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
+
- rl
|
| 13 |
+
- scheduling
|
| 14 |
---
|
| 15 |
|
| 16 |
+
# Taskmanager Environment
|
| 17 |
+
|
| 18 |
+
A reinforcement learning environment that simulates a real-world engineering workflow. The agent must prioritize tickets (bugs, features, UI enhancements) to maximize business impact and avoid SLA violations.
|
| 19 |
+
|
| 20 |
+
## Quick Start
|
| 21 |
+
|
| 22 |
+
The simplest way to use the Taskmanager environment is through the `TaskmanagerEnv` class:
|
| 23 |
+
|
| 24 |
+
```python
|
| 25 |
+
from taskmanager import TaskmanagerAction, TaskmanagerEnv
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
# Create environment from Docker image
|
| 29 |
+
env = TaskmanagerEnv.from_docker_image("taskmanager:latest")
|
| 30 |
+
|
| 31 |
+
# Reset to start a new episode
|
| 32 |
+
result = env.reset()
|
| 33 |
+
|
| 34 |
+
print(f"Current Time: {result.observation.current_time}")
|
| 35 |
+
print(f"Available Tasks: {len(result.observation.tasks)}")
|
| 36 |
+
|
| 37 |
+
# Execute tasks until the episode is done
|
| 38 |
+
done = False
|
| 39 |
+
while not done:
|
| 40 |
+
# Simple policy: pick the first available task
|
| 41 |
+
if not result.observation.tasks:
|
| 42 |
+
break
|
| 43 |
+
|
| 44 |
+
task_to_execute = result.observation.tasks[0]
|
| 45 |
+
task_id = task_to_execute["id"]
|
| 46 |
+
|
| 47 |
+
# Take a step
|
| 48 |
+
result = env.step(TaskmanagerAction(task_id=task_id))
|
| 49 |
+
|
| 50 |
+
print(f"Executed Task ID: {task_id}")
|
| 51 |
+
print(f" → Reward: {result.reward}")
|
| 52 |
+
print(f" → Current Time: {result.observation.current_time}")
|
| 53 |
+
print(f" → Tasks Remaining: {len(result.observation.tasks)}")
|
| 54 |
+
|
| 55 |
+
done = result.done
|
| 56 |
+
|
| 57 |
+
print("Episode completed!")
|
| 58 |
+
|
| 59 |
+
finally:
|
| 60 |
+
# Always clean up
|
| 61 |
+
env.close()
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
## Environment Details
|
| 65 |
+
|
| 66 |
+
### Action
|
| 67 |
+
**TaskmanagerAction**: Contains a single field specifying the task to execute.
|
| 68 |
+
- `task_id` (int) - The ID of the task/ticket to execute
|
| 69 |
+
|
| 70 |
+
### Observation
|
| 71 |
+
**TaskmanagerObservation**: Contains the current state of the environment.
|
| 72 |
+
- `tasks` (List[Dict]) - List of remaining tickets. Each ticket has:
|
| 73 |
+
- `id` (int): Unique identifier
|
| 74 |
+
- `type` (str): "bug", "feature", or "enhancement"
|
| 75 |
+
- `effort` (int): Time required to complete the ticket
|
| 76 |
+
- `priority` (int): Importance (1-5)
|
| 77 |
+
- `deadline` (int): Target completion time
|
| 78 |
+
- `current_time` (int) - Current time in the schedule
|
| 79 |
+
- `steps_left` (int) - Steps remaining in the episode
|
| 80 |
+
- `reward` (float) - Reward received from the previous action
|
| 81 |
+
- `done` (bool) - Whether the episode is complete (all tasks done or max steps reached)
|
| 82 |
+
- `metadata` (dict) - Additional info like step count
|
| 83 |
+
|
| 84 |
+
### Reward Function
|
| 85 |
+
The reward function is designed to simulate business impact:
|
| 86 |
+
1. **Base Reward**: `priority * 3` if completed before the deadline.
|
| 87 |
+
2. **Penalty**: If delayed, the reward is reduced based on the delay (`priority - delay * 0.5`), with a minimum of -2.
|
| 88 |
+
3. **Type Multipliers**:
|
| 89 |
+
- **Bugs**: 2.0x multiplier (Critical)
|
| 90 |
+
- **Features**: 1.5x multiplier
|
| 91 |
+
- **Enhancements**: 1.0x multiplier
|
| 92 |
+
4. **Invalid Action**: -1 reward for attempting a non-existent task ID.
|
| 93 |
+
|
| 94 |
+
## Building the Docker Image
|
| 95 |
+
|
| 96 |
+
Before using the environment, you need to build the Docker image.
|
| 97 |
+
|
| 98 |
+
**To create the Docker image:**
|
| 99 |
+
```bash
|
| 100 |
+
# From project root
|
| 101 |
+
docker build -t taskmanager .
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
**To run the Docker image locally:**
|
| 105 |
+
```bash
|
| 106 |
+
# Run the container in detached mode and map port 8000
|
| 107 |
+
docker run -d -p 8000:8000 --name taskmanager_server taskmanager
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
## Deploying to Hugging Face Spaces
|
| 111 |
+
|
| 112 |
+
You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
|
| 113 |
+
|
| 114 |
+
```bash
|
| 115 |
+
# From the environment directory (where openenv.yaml is located)
|
| 116 |
+
openenv push
|
| 117 |
+
|
| 118 |
+
# Or specify options
|
| 119 |
+
openenv push --namespace my-org --private
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
After deployment, your space will be available at:
|
| 123 |
+
`https://huggingface.co/spaces/<repo-id>`
|
| 124 |
+
|
| 125 |
+
The deployed space includes:
|
| 126 |
+
- **Web Interface** at `/web` - Interactive UI for exploring the environment
|
| 127 |
+
- **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
|
| 128 |
+
- **Health Check** at `/health` - Container health monitoring
|
| 129 |
+
- **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
|
| 130 |
+
|
| 131 |
+
## Project Structure
|
| 132 |
+
|
| 133 |
+
```
|
| 134 |
+
taskmanager/
|
| 135 |
+
├── client.py # Environment client implementation
|
| 136 |
+
├── models.py # Action and Observation Pydantic models
|
| 137 |
+
├── openenv.yaml # OpenEnv manifest
|
| 138 |
+
├── server/
|
| 139 |
+
│ ├── app.py # FastAPI application
|
| 140 |
+
│ └── taskmanager_environment.py # Core environment logic and reward function
|
| 141 |
+
└── Dockerfile # Container definition
|
| 142 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Taskmanager Environment."""
|
| 8 |
+
|
| 9 |
+
from .client import TaskmanagerEnv
|
| 10 |
+
from .models import TaskmanagerAction, TaskmanagerObservation
|
| 11 |
+
|
| 12 |
+
__all__ = [
|
| 13 |
+
"TaskmanagerAction",
|
| 14 |
+
"TaskmanagerObservation",
|
| 15 |
+
"TaskmanagerEnv",
|
| 16 |
+
]
|
client.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
|
| 4 |
+
"""Taskmanager Environment Client."""
|
| 5 |
+
|
| 6 |
+
from typing import Dict, List, Optional
|
| 7 |
+
|
| 8 |
+
from openenv.core import EnvClient
|
| 9 |
+
from openenv.core.client_types import StepResult
|
| 10 |
+
from openenv.core.env_server.types import State
|
| 11 |
+
|
| 12 |
+
from models import TaskmanagerAction, TaskmanagerObservation
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class TaskmanagerEnv(EnvClient[TaskmanagerAction, TaskmanagerObservation, State]):
|
| 16 |
+
"""
|
| 17 |
+
Client for the Task Scheduling Environment.
|
| 18 |
+
|
| 19 |
+
Supports:
|
| 20 |
+
- default reset()
|
| 21 |
+
- custom reset with user-defined tasks
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
# ================= STEP =================
|
| 25 |
+
|
| 26 |
+
def _step_payload(self, action: TaskmanagerAction) -> Dict:
|
| 27 |
+
return {
|
| 28 |
+
"task_id": action.task_id,
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
# ================= PARSE RESULT =================
|
| 32 |
+
|
| 33 |
+
def _parse_result(self, payload: Dict) -> StepResult[TaskmanagerObservation]:
|
| 34 |
+
obs_data = payload.get("observation", {})
|
| 35 |
+
|
| 36 |
+
observation = TaskmanagerObservation(
|
| 37 |
+
tasks=obs_data.get("tasks", []),
|
| 38 |
+
current_time=obs_data.get("current_time", 0),
|
| 39 |
+
steps_left=obs_data.get("steps_left", 0),
|
| 40 |
+
done=payload.get("done", False),
|
| 41 |
+
reward=payload.get("reward"),
|
| 42 |
+
metadata=obs_data.get("metadata", {}),
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
return StepResult(
|
| 46 |
+
observation=observation,
|
| 47 |
+
reward=payload.get("reward"),
|
| 48 |
+
done=payload.get("done", False),
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
# ================= PARSE STATE =================
|
| 52 |
+
|
| 53 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 54 |
+
return State(
|
| 55 |
+
episode_id=payload.get("episode_id"),
|
| 56 |
+
step_count=payload.get("step_count", 0),
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
# ================= CUSTOM RESET =================
|
| 60 |
+
|
| 61 |
+
async def reset(
|
| 62 |
+
self, tasks: Optional[List[Dict]] = None, **kwargs
|
| 63 |
+
) -> StepResult[TaskmanagerObservation]:
|
| 64 |
+
"""
|
| 65 |
+
Reset the environment. If tasks are provided, they will be used instead of the predefined task list.
|
| 66 |
+
"""
|
| 67 |
+
payload = kwargs.pop("config", {}) or {}
|
| 68 |
+
if tasks is not None:
|
| 69 |
+
payload["tasks"] = tasks
|
| 70 |
+
|
| 71 |
+
# Call underlying reset via super
|
| 72 |
+
return await super().reset(config=payload, **kwargs)
|
grader.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# grader.py
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def compute_score(total_reward, max_possible_reward):
|
| 5 |
+
"""
|
| 6 |
+
Normalize score between 0 and 1
|
| 7 |
+
"""
|
| 8 |
+
if max_possible_reward <= 0:
|
| 9 |
+
return 0.001
|
| 10 |
+
|
| 11 |
+
score = total_reward / max_possible_reward
|
| 12 |
+
return max(0.001, min(0.999, score))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def evaluate_episode(rewards, max_per_step=15):
|
| 16 |
+
total_reward = sum(rewards)
|
| 17 |
+
max_possible = len(rewards) * max_per_step
|
| 18 |
+
return compute_score(total_reward, max_possible)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def evaluate_task1(rewards):
|
| 22 |
+
return evaluate_episode(rewards, 15)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def evaluate_task2(rewards):
|
| 26 |
+
return evaluate_episode(rewards, 15)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def evaluate_task3(rewards):
|
| 30 |
+
return evaluate_episode(rewards, 15)
|
inference.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
from typing import List, Optional
|
| 4 |
+
|
| 5 |
+
from openai import OpenAI
|
| 6 |
+
|
| 7 |
+
from client import TaskmanagerEnv
|
| 8 |
+
from models import TaskmanagerAction
|
| 9 |
+
from grader import compute_score # ✅ GRADER USED
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# ================= CONFIG =================
|
| 13 |
+
|
| 14 |
+
API_KEY = os.environ.get("API_KEY", "dummy")
|
| 15 |
+
API_BASE_URL = os.environ.get("API_BASE_URL", "http://localhost:4000")
|
| 16 |
+
MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
|
| 17 |
+
|
| 18 |
+
BENCHMARK = "taskmanager"
|
| 19 |
+
MAX_STEPS = 20
|
| 20 |
+
SUCCESS_SCORE_THRESHOLD = 0.6
|
| 21 |
+
|
| 22 |
+
# ================= LOGGING =================
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def log_start(task: str, env: str, model: str):
|
| 26 |
+
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]):
|
| 30 |
+
error_val = error if error else "null"
|
| 31 |
+
print(
|
| 32 |
+
f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}",
|
| 33 |
+
flush=True,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def log_end(success: bool, steps: int, score: float, rewards: List[float]):
|
| 38 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 39 |
+
print(
|
| 40 |
+
f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
|
| 41 |
+
flush=True,
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# ================= SMART POLICY =================
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def choose_best_ticket(tickets):
|
| 49 |
+
if not tickets:
|
| 50 |
+
return None
|
| 51 |
+
|
| 52 |
+
def score(ticket):
|
| 53 |
+
type_score = {"bug": 3, "feature": 2, "enhancement": 1}
|
| 54 |
+
return (
|
| 55 |
+
type_score.get(ticket["type"], 0),
|
| 56 |
+
ticket["priority"],
|
| 57 |
+
-ticket["deadline"],
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
best = sorted(tickets, key=score, reverse=True)[0]
|
| 61 |
+
return best["id"]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# ================= MAIN =================
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
async def main():
|
| 68 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 69 |
+
env = TaskmanagerEnv(base_url="http://localhost:8000")
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
tasks_to_run = ["task-1-easy", "task-2-medium", "task-3-hard"]
|
| 73 |
+
|
| 74 |
+
for task_idx, TASK_NAME in enumerate(tasks_to_run):
|
| 75 |
+
rewards: List[float] = []
|
| 76 |
+
steps_taken = 0
|
| 77 |
+
success = False
|
| 78 |
+
score = 0.0
|
| 79 |
+
|
| 80 |
+
log_start(TASK_NAME, BENCHMARK, MODEL_NAME)
|
| 81 |
+
|
| 82 |
+
# 🔥 RESET ENV (tickets auto-generated)
|
| 83 |
+
result = await env.reset()
|
| 84 |
+
obs = result.observation
|
| 85 |
+
|
| 86 |
+
try:
|
| 87 |
+
client.chat.completions.create(
|
| 88 |
+
model=MODEL_NAME,
|
| 89 |
+
messages=[{"role": "user", "content": "hello"}],
|
| 90 |
+
max_tokens=1,
|
| 91 |
+
)
|
| 92 |
+
except Exception:
|
| 93 |
+
pass
|
| 94 |
+
|
| 95 |
+
for step in range(1, MAX_STEPS + 1):
|
| 96 |
+
if result.done:
|
| 97 |
+
break
|
| 98 |
+
|
| 99 |
+
tickets = obs.tasks
|
| 100 |
+
ticket_id = choose_best_ticket(tickets)
|
| 101 |
+
|
| 102 |
+
if ticket_id is None:
|
| 103 |
+
break
|
| 104 |
+
|
| 105 |
+
# 🔥 STEP
|
| 106 |
+
result = await env.step(TaskmanagerAction(task_id=ticket_id))
|
| 107 |
+
obs = result.observation
|
| 108 |
+
|
| 109 |
+
reward = result.reward or 0.0
|
| 110 |
+
done = result.done
|
| 111 |
+
error = None
|
| 112 |
+
|
| 113 |
+
rewards.append(reward)
|
| 114 |
+
steps_taken = step
|
| 115 |
+
|
| 116 |
+
log_step(
|
| 117 |
+
step=step,
|
| 118 |
+
action=f"resolve_ticket_{ticket_id}",
|
| 119 |
+
reward=reward,
|
| 120 |
+
done=done,
|
| 121 |
+
error=error,
|
| 122 |
+
)
|
| 123 |
+
|
| 124 |
+
if done:
|
| 125 |
+
break
|
| 126 |
+
|
| 127 |
+
# ================= GRADER =================
|
| 128 |
+
total_reward = sum(rewards)
|
| 129 |
+
max_per_step = 15
|
| 130 |
+
max_possible = len(rewards) * max_per_step
|
| 131 |
+
score = compute_score(total_reward, max_possible) # ✅ GRADER USED
|
| 132 |
+
success = score >= SUCCESS_SCORE_THRESHOLD
|
| 133 |
+
|
| 134 |
+
log_end(success, steps_taken, score, rewards)
|
| 135 |
+
|
| 136 |
+
finally:
|
| 137 |
+
try:
|
| 138 |
+
await env.close()
|
| 139 |
+
except Exception:
|
| 140 |
+
pass
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
if __name__ == "__main__":
|
| 144 |
+
asyncio.run(main())
|
models.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Data models for the Taskmanager Environment.
|
| 9 |
+
|
| 10 |
+
The taskmanager environment is a simple test environment that echoes back messages.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from openenv.core.env_server.types import Action, Observation
|
| 14 |
+
from pydantic import Field
|
| 15 |
+
from typing import List, Dict
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class TaskmanagerAction(Action):
|
| 19 |
+
"""Action for the Taskmanager environment - just a message to echo."""
|
| 20 |
+
|
| 21 |
+
task_id: int = Field(..., description="Task to execute")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class TaskmanagerObservation(Observation):
|
| 25 |
+
"""Observation for Task Scheduling Environment"""
|
| 26 |
+
|
| 27 |
+
tasks: List[Dict] = Field(
|
| 28 |
+
default_factory=list, description="List of remaining tickets"
|
| 29 |
+
)
|
| 30 |
+
current_time: int = Field(default=0, description="Current time in the schedule")
|
| 31 |
+
steps_left: int = Field(default=0, description="Steps remaining in episode")
|
openenv.yaml
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: ai-ticket-prioritization-env
|
| 2 |
+
|
| 3 |
+
description: >
|
| 4 |
+
A reinforcement learning environment simulating real-world software engineering
|
| 5 |
+
ticket prioritization. The agent must decide the optimal order to resolve tickets
|
| 6 |
+
(bugs, features, enhancements) under deadlines and resource constraints.
|
| 7 |
+
|
| 8 |
+
entry_point: server.taskmanager_environment:TaskmanagerEnvironment
|
| 9 |
+
|
| 10 |
+
action_space: TaskmanagerAction
|
| 11 |
+
observation_space: TaskmanagerObservation
|
| 12 |
+
|
| 13 |
+
max_steps: 20
|
| 14 |
+
|
| 15 |
+
tags:
|
| 16 |
+
- openenv
|
| 17 |
+
- reinforcement-learning
|
| 18 |
+
- scheduling
|
| 19 |
+
- software-engineering
|
openenv_taskmanager.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-taskmanager
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Taskmanager environment for OpenEnv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core[core]>=0.2.1
|
| 7 |
+
Provides-Extra: dev
|
| 8 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 9 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_taskmanager.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
pyproject.toml
|
| 3 |
+
./__init__.py
|
| 4 |
+
./client.py
|
| 5 |
+
./models.py
|
| 6 |
+
openenv_taskmanager.egg-info/PKG-INFO
|
| 7 |
+
openenv_taskmanager.egg-info/SOURCES.txt
|
| 8 |
+
openenv_taskmanager.egg-info/dependency_links.txt
|
| 9 |
+
openenv_taskmanager.egg-info/entry_points.txt
|
| 10 |
+
openenv_taskmanager.egg-info/requires.txt
|
| 11 |
+
openenv_taskmanager.egg-info/top_level.txt
|
| 12 |
+
server/__init__.py
|
| 13 |
+
server/app.py
|
| 14 |
+
server/taskmanager_environment.py
|
openenv_taskmanager.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_taskmanager.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = taskmanager.server.app:main
|
openenv_taskmanager.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core[core]>=0.2.1
|
| 2 |
+
|
| 3 |
+
[dev]
|
| 4 |
+
pytest>=8.0.0
|
| 5 |
+
pytest-cov>=4.0.0
|
openenv_taskmanager.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
taskmanager
|
pyproject.toml
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-taskmanager"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Taskmanager environment for OpenEnv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv runtime (provides FastAPI server + HTTP client types)
|
| 18 |
+
# install from github
|
| 19 |
+
# "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
|
| 20 |
+
"openenv-core[core]>=0.2.1",
|
| 21 |
+
# Environment-specific dependencies
|
| 22 |
+
# Add all dependencies needed for your environment here
|
| 23 |
+
# Examples:
|
| 24 |
+
# "numpy>=1.19.0",
|
| 25 |
+
# "torch>=2.0.0",
|
| 26 |
+
# "gymnasium>=0.29.0",
|
| 27 |
+
# "openspiel>=1.0.0",
|
| 28 |
+
# "smolagents>=1.22.0,<2",
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
[project.optional-dependencies]
|
| 32 |
+
dev = [
|
| 33 |
+
"pytest>=8.0.0",
|
| 34 |
+
"pytest-cov>=4.0.0",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
[project.scripts]
|
| 38 |
+
# Server entry point - enables running via: uv run --project . server
|
| 39 |
+
# or: python -m taskmanager.server.app
|
| 40 |
+
server = "taskmanager.server.app:main"
|
| 41 |
+
|
| 42 |
+
[tool.setuptools]
|
| 43 |
+
include-package-data = true
|
| 44 |
+
packages = ["taskmanager", "taskmanager.server"]
|
| 45 |
+
package-dir = { "taskmanager" = ".", "taskmanager.server" = "server" }
|
server/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Taskmanager environment server components."""
|
| 8 |
+
|
| 9 |
+
from .taskmanager_environment import TaskmanagerEnvironment
|
| 10 |
+
|
| 11 |
+
__all__ = ["TaskmanagerEnvironment"]
|
server/app.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
FastAPI application for the Taskmanager Environment.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from fastapi.responses import HTMLResponse
|
| 9 |
+
from fastapi import APIRouter
|
| 10 |
+
import os
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
from openenv.core.env_server.http_server import create_app
|
| 14 |
+
except Exception as e:
|
| 15 |
+
raise ImportError(
|
| 16 |
+
"openenv is required. Install dependencies with 'uv sync'"
|
| 17 |
+
) from e
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
from ..models import TaskmanagerAction, TaskmanagerObservation
|
| 21 |
+
from .taskmanager_environment import TaskmanagerEnvironment
|
| 22 |
+
except ImportError:
|
| 23 |
+
from models import TaskmanagerAction, TaskmanagerObservation
|
| 24 |
+
from server.taskmanager_environment import TaskmanagerEnvironment
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ================= CREATE APP =================
|
| 28 |
+
|
| 29 |
+
app = create_app(
|
| 30 |
+
TaskmanagerEnvironment,
|
| 31 |
+
TaskmanagerAction,
|
| 32 |
+
TaskmanagerObservation,
|
| 33 |
+
env_name="taskmanager",
|
| 34 |
+
max_concurrent_envs=1,
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# ================= ROUTER =================
|
| 38 |
+
|
| 39 |
+
router = APIRouter()
|
| 40 |
+
|
| 41 |
+
# 🔥 Serve demo UI
|
| 42 |
+
@router.get("/", response_class=HTMLResponse)
|
| 43 |
+
def home():
|
| 44 |
+
file_path = os.path.join(os.path.dirname(__file__), "..", "demo.html")
|
| 45 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 46 |
+
return f.read()
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# 🔥 Run full agent (for UI)
|
| 50 |
+
@router.get("/run-agent")
|
| 51 |
+
def run_agent():
|
| 52 |
+
env = TaskmanagerEnvironment()
|
| 53 |
+
|
| 54 |
+
obs = env.reset()
|
| 55 |
+
total_reward = 0
|
| 56 |
+
steps = []
|
| 57 |
+
|
| 58 |
+
for _ in range(20):
|
| 59 |
+
tickets = obs.tasks
|
| 60 |
+
|
| 61 |
+
if not tickets:
|
| 62 |
+
break
|
| 63 |
+
|
| 64 |
+
# 🔥 same logic as inference
|
| 65 |
+
def score(t):
|
| 66 |
+
type_score = {"bug": 3, "feature": 2, "enhancement": 1}
|
| 67 |
+
return (
|
| 68 |
+
type_score.get(t["type"], 0),
|
| 69 |
+
t["priority"],
|
| 70 |
+
-t["deadline"]
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
best = sorted(tickets, key=score, reverse=True)[0]
|
| 74 |
+
|
| 75 |
+
obs = env.step(type("obj", (), {"task_id": best["id"]}))
|
| 76 |
+
|
| 77 |
+
total_reward += obs.reward
|
| 78 |
+
|
| 79 |
+
steps.append({
|
| 80 |
+
"chosen": best,
|
| 81 |
+
"remaining": obs.tasks,
|
| 82 |
+
"reward": obs.reward,
|
| 83 |
+
"time": obs.current_time
|
| 84 |
+
})
|
| 85 |
+
|
| 86 |
+
if obs.done:
|
| 87 |
+
break
|
| 88 |
+
|
| 89 |
+
# 🔥 normalize score
|
| 90 |
+
score = max(0.0, min(1.0, total_reward / 200))
|
| 91 |
+
|
| 92 |
+
return {
|
| 93 |
+
"steps": steps,
|
| 94 |
+
"final_score": score
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
app.include_router(router)
|
| 99 |
+
|
| 100 |
+
# ================= MAIN =================
|
| 101 |
+
|
| 102 |
+
def main(host: str = "0.0.0.0", port: int = 8000):
|
| 103 |
+
import uvicorn
|
| 104 |
+
uvicorn.run(app, host=host, port=port)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
if __name__ == "__main__":
|
| 108 |
+
main()
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv[core]>=0.2.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
server/taskmanager_environment.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
|
| 4 |
+
"""
|
| 5 |
+
AI Ticket Prioritization Environment (Jira-like)
|
| 6 |
+
|
| 7 |
+
Simulates a real-world engineering workflow:
|
| 8 |
+
- Bug fixes (critical)
|
| 9 |
+
- Feature development
|
| 10 |
+
- UI enhancements
|
| 11 |
+
|
| 12 |
+
Agent must prioritize tickets to maximize business impact and avoid SLA violations.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from dataclasses import dataclass
|
| 16 |
+
from uuid import uuid4
|
| 17 |
+
import random
|
| 18 |
+
|
| 19 |
+
try:
|
| 20 |
+
from openenv.core.env_server.interfaces import Environment
|
| 21 |
+
from openenv.core.env_server.types import State
|
| 22 |
+
except ImportError:
|
| 23 |
+
|
| 24 |
+
class Environment:
|
| 25 |
+
pass
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class State:
|
| 29 |
+
episode_id: str
|
| 30 |
+
step_count: int
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
from ..models import TaskmanagerAction, TaskmanagerObservation
|
| 35 |
+
except ImportError:
|
| 36 |
+
from models import TaskmanagerAction, TaskmanagerObservation
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class TaskmanagerEnvironment(Environment):
|
| 40 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 41 |
+
|
| 42 |
+
def __init__(self):
|
| 43 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 44 |
+
self.current_time = 0
|
| 45 |
+
self.tickets = []
|
| 46 |
+
self.max_steps = 20
|
| 47 |
+
|
| 48 |
+
self.total_reward = 0
|
| 49 |
+
self.episode_count = 0
|
| 50 |
+
|
| 51 |
+
# ================= TICKET GENERATOR =================
|
| 52 |
+
|
| 53 |
+
def generate_tickets(self, num_tickets):
|
| 54 |
+
tickets = []
|
| 55 |
+
current_time = 0
|
| 56 |
+
|
| 57 |
+
for i in range(num_tickets):
|
| 58 |
+
ticket_type = random.choice(["bug", "feature", "enhancement"])
|
| 59 |
+
|
| 60 |
+
effort = random.randint(1, 3)
|
| 61 |
+
|
| 62 |
+
# 🔥 ensure feasible deadline
|
| 63 |
+
slack = random.randint(3, 8)
|
| 64 |
+
deadline = current_time + effort + slack
|
| 65 |
+
|
| 66 |
+
priority = random.randint(1, 5)
|
| 67 |
+
|
| 68 |
+
ticket = {
|
| 69 |
+
"id": i + 1,
|
| 70 |
+
"deadline": deadline,
|
| 71 |
+
"priority": priority,
|
| 72 |
+
"effort": effort,
|
| 73 |
+
"type": ticket_type,
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
tickets.append(ticket)
|
| 77 |
+
|
| 78 |
+
# update time so sequence is solvable
|
| 79 |
+
current_time += effort
|
| 80 |
+
|
| 81 |
+
# 🔥 shuffle so agent must think
|
| 82 |
+
random.shuffle(tickets)
|
| 83 |
+
|
| 84 |
+
return tickets
|
| 85 |
+
|
| 86 |
+
# ================= RESET =================
|
| 87 |
+
|
| 88 |
+
def reset(self) -> TaskmanagerObservation:
|
| 89 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 90 |
+
self.current_time = 0
|
| 91 |
+
|
| 92 |
+
self.episode_count += 1
|
| 93 |
+
|
| 94 |
+
avg_reward = self.total_reward / max(1, self.episode_count)
|
| 95 |
+
|
| 96 |
+
if avg_reward < 5:
|
| 97 |
+
num_tickets = 5
|
| 98 |
+
elif avg_reward < 15:
|
| 99 |
+
num_tickets = 8
|
| 100 |
+
else:
|
| 101 |
+
num_tickets = 12
|
| 102 |
+
|
| 103 |
+
self.tickets = self.generate_tickets(num_tickets)
|
| 104 |
+
|
| 105 |
+
print(
|
| 106 |
+
f"Episode {self.episode_count} | Tickets: {num_tickets} | Avg reward: {avg_reward:.2f}"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
return TaskmanagerObservation(
|
| 110 |
+
tasks=self.tickets, # ⚠️ keep 'tasks' for compatibility
|
| 111 |
+
current_time=self.current_time,
|
| 112 |
+
steps_left=self.max_steps,
|
| 113 |
+
reward=0.0,
|
| 114 |
+
done=False,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# ================= STEP =================
|
| 118 |
+
|
| 119 |
+
def step(self, action: TaskmanagerAction) -> TaskmanagerObservation:
|
| 120 |
+
self._state.step_count += 1
|
| 121 |
+
|
| 122 |
+
reward = 0
|
| 123 |
+
|
| 124 |
+
ticket = next((t for t in self.tickets if t["id"] == action.task_id), None)
|
| 125 |
+
|
| 126 |
+
if ticket:
|
| 127 |
+
self.current_time += ticket["effort"]
|
| 128 |
+
|
| 129 |
+
# 🎯 BASE REWARD
|
| 130 |
+
if self.current_time <= ticket["deadline"]:
|
| 131 |
+
reward = ticket["priority"] * 3 # boosted reward for being on time
|
| 132 |
+
else:
|
| 133 |
+
delay = self.current_time - ticket["deadline"]
|
| 134 |
+
# Soft penalty: base priority minus a small delay fraction (can still be positive if slightly late)
|
| 135 |
+
reward = max(-2, ticket["priority"] - (delay * 0.5))
|
| 136 |
+
|
| 137 |
+
# 🔥 TYPE MULTIPLIER (REAL-WORLD LOGIC)
|
| 138 |
+
if ticket["type"] == "bug":
|
| 139 |
+
reward *= 2 # critical
|
| 140 |
+
elif ticket["type"] == "feature":
|
| 141 |
+
reward *= 1.5
|
| 142 |
+
else: # enhancement
|
| 143 |
+
reward *= 1
|
| 144 |
+
|
| 145 |
+
# remove ticket
|
| 146 |
+
self.tickets = [t for t in self.tickets if t["id"] != action.task_id]
|
| 147 |
+
|
| 148 |
+
else:
|
| 149 |
+
reward = -1
|
| 150 |
+
|
| 151 |
+
self.total_reward += reward
|
| 152 |
+
|
| 153 |
+
done = len(self.tickets) == 0 or self._state.step_count >= self.max_steps
|
| 154 |
+
|
| 155 |
+
return TaskmanagerObservation(
|
| 156 |
+
tasks=self.tickets,
|
| 157 |
+
current_time=self.current_time,
|
| 158 |
+
steps_left=self.max_steps - self._state.step_count,
|
| 159 |
+
reward=reward,
|
| 160 |
+
done=done,
|
| 161 |
+
metadata={
|
| 162 |
+
"step": self._state.step_count,
|
| 163 |
+
"remaining_tickets": len(self.tickets),
|
| 164 |
+
},
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
@property
|
| 168 |
+
def state(self) -> State:
|
| 169 |
+
return self._state
|
test.py
ADDED
|
File without changes
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|