ushort commited on
Commit
2f684d2
·
verified ·
1 Parent(s): f1997d9

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Multi-stage build using openenv-base
8
+ # This Dockerfile is flexible and works for both:
9
+ # - In-repo environments (with local OpenEnv sources)
10
+ # - Standalone environments (with openenv from PyPI/Git)
11
+ # The build script (openenv build) handles context detection and sets appropriate build args.
12
+
13
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
+ FROM ${BASE_IMAGE} AS builder
15
+
16
+ WORKDIR /app
17
+
18
+ # Ensure git is available (required for installing dependencies from VCS)
19
+ RUN apt-get update && \
20
+ apt-get install -y --no-install-recommends git && \
21
+ rm -rf /var/lib/apt/lists/*
22
+
23
+ # Build argument to control whether we're building standalone or in-repo
24
+ ARG BUILD_MODE=in-repo
25
+ ARG ENV_NAME=myenv
26
+
27
+ # Copy environment code (always at root of build context)
28
+ COPY . /app/env
29
+
30
+ # For in-repo builds, openenv is already vendored in the build context
31
+ # For standalone builds, openenv will be installed via pyproject.toml
32
+ WORKDIR /app/env
33
+
34
+ # Ensure uv is available (for local builds where base image lacks it)
35
+ RUN if ! command -v uv >/dev/null 2>&1; then \
36
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
38
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
+ fi
40
+
41
+ # Install dependencies using uv sync
42
+ # If uv.lock exists, use it; otherwise resolve on the fly
43
+ RUN --mount=type=cache,target=/root/.cache/uv \
44
+ if [ -f uv.lock ]; then \
45
+ uv sync --frozen --no-install-project --no-editable; \
46
+ else \
47
+ uv sync --no-install-project --no-editable; \
48
+ fi
49
+
50
+ RUN --mount=type=cache,target=/root/.cache/uv \
51
+ if [ -f uv.lock ]; then \
52
+ uv sync --frozen --no-editable; \
53
+ else \
54
+ uv sync --no-editable; \
55
+ fi
56
+
57
+ # Final runtime stage
58
+ FROM ${BASE_IMAGE}
59
+
60
+ WORKDIR /app
61
+
62
+ # Copy the virtual environment from builder
63
+ COPY --from=builder /app/env/.venv /app/.venv
64
+
65
+ # Copy the environment code
66
+ COPY --from=builder /app/env /app/env
67
+
68
+ # Set PATH to use the virtual environment
69
+ ENV PATH="/app/.venv/bin:$PATH"
70
+
71
+ # Set PYTHONPATH so imports work correctly
72
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
+
74
+ ENV ENABLE_WEB_INTERFACE=true
75
+ # Health check
76
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
77
+ CMD curl -f http://localhost:8000/health || exit 1
78
+
79
+ # Run the FastAPI server
80
+ # The module path is constructed to work with the /app/env structure
81
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
README.md CHANGED
@@ -1,10 +1,142 @@
1
  ---
2
- title: Doc
3
- emoji: 📊
4
- colorFrom: yellow
5
- colorTo: blue
6
  sdk: docker
7
  pinned: false
 
 
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Taskmanager Environment Server
3
+ emoji: 🎬
4
+ colorFrom: green
5
+ colorTo: red
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
+ - rl
13
+ - scheduling
14
  ---
15
 
16
+ # Taskmanager Environment
17
+
18
+ A reinforcement learning environment that simulates a real-world engineering workflow. The agent must prioritize tickets (bugs, features, UI enhancements) to maximize business impact and avoid SLA violations.
19
+
20
+ ## Quick Start
21
+
22
+ The simplest way to use the Taskmanager environment is through the `TaskmanagerEnv` class:
23
+
24
+ ```python
25
+ from taskmanager import TaskmanagerAction, TaskmanagerEnv
26
+
27
+ try:
28
+ # Create environment from Docker image
29
+ env = TaskmanagerEnv.from_docker_image("taskmanager:latest")
30
+
31
+ # Reset to start a new episode
32
+ result = env.reset()
33
+
34
+ print(f"Current Time: {result.observation.current_time}")
35
+ print(f"Available Tasks: {len(result.observation.tasks)}")
36
+
37
+ # Execute tasks until the episode is done
38
+ done = False
39
+ while not done:
40
+ # Simple policy: pick the first available task
41
+ if not result.observation.tasks:
42
+ break
43
+
44
+ task_to_execute = result.observation.tasks[0]
45
+ task_id = task_to_execute["id"]
46
+
47
+ # Take a step
48
+ result = env.step(TaskmanagerAction(task_id=task_id))
49
+
50
+ print(f"Executed Task ID: {task_id}")
51
+ print(f" → Reward: {result.reward}")
52
+ print(f" → Current Time: {result.observation.current_time}")
53
+ print(f" → Tasks Remaining: {len(result.observation.tasks)}")
54
+
55
+ done = result.done
56
+
57
+ print("Episode completed!")
58
+
59
+ finally:
60
+ # Always clean up
61
+ env.close()
62
+ ```
63
+
64
+ ## Environment Details
65
+
66
+ ### Action
67
+ **TaskmanagerAction**: Contains a single field specifying the task to execute.
68
+ - `task_id` (int) - The ID of the task/ticket to execute
69
+
70
+ ### Observation
71
+ **TaskmanagerObservation**: Contains the current state of the environment.
72
+ - `tasks` (List[Dict]) - List of remaining tickets. Each ticket has:
73
+ - `id` (int): Unique identifier
74
+ - `type` (str): "bug", "feature", or "enhancement"
75
+ - `effort` (int): Time required to complete the ticket
76
+ - `priority` (int): Importance (1-5)
77
+ - `deadline` (int): Target completion time
78
+ - `current_time` (int) - Current time in the schedule
79
+ - `steps_left` (int) - Steps remaining in the episode
80
+ - `reward` (float) - Reward received from the previous action
81
+ - `done` (bool) - Whether the episode is complete (all tasks done or max steps reached)
82
+ - `metadata` (dict) - Additional info like step count
83
+
84
+ ### Reward Function
85
+ The reward function is designed to simulate business impact:
86
+ 1. **Base Reward**: `priority * 3` if completed before the deadline.
87
+ 2. **Penalty**: If delayed, the reward is reduced based on the delay (`priority - delay * 0.5`), with a minimum of -2.
88
+ 3. **Type Multipliers**:
89
+ - **Bugs**: 2.0x multiplier (Critical)
90
+ - **Features**: 1.5x multiplier
91
+ - **Enhancements**: 1.0x multiplier
92
+ 4. **Invalid Action**: -1 reward for attempting a non-existent task ID.
93
+
94
+ ## Building the Docker Image
95
+
96
+ Before using the environment, you need to build the Docker image.
97
+
98
+ **To create the Docker image:**
99
+ ```bash
100
+ # From project root
101
+ docker build -t taskmanager .
102
+ ```
103
+
104
+ **To run the Docker image locally:**
105
+ ```bash
106
+ # Run the container in detached mode and map port 8000
107
+ docker run -d -p 8000:8000 --name taskmanager_server taskmanager
108
+ ```
109
+
110
+ ## Deploying to Hugging Face Spaces
111
+
112
+ You can easily deploy your OpenEnv environment to Hugging Face Spaces using the `openenv push` command:
113
+
114
+ ```bash
115
+ # From the environment directory (where openenv.yaml is located)
116
+ openenv push
117
+
118
+ # Or specify options
119
+ openenv push --namespace my-org --private
120
+ ```
121
+
122
+ After deployment, your space will be available at:
123
+ `https://huggingface.co/spaces/<repo-id>`
124
+
125
+ The deployed space includes:
126
+ - **Web Interface** at `/web` - Interactive UI for exploring the environment
127
+ - **API Documentation** at `/docs` - Full OpenAPI/Swagger interface
128
+ - **Health Check** at `/health` - Container health monitoring
129
+ - **WebSocket** at `/ws` - Persistent session endpoint for low-latency interactions
130
+
131
+ ## Project Structure
132
+
133
+ ```
134
+ taskmanager/
135
+ ├── client.py # Environment client implementation
136
+ ├── models.py # Action and Observation Pydantic models
137
+ ├── openenv.yaml # OpenEnv manifest
138
+ ├── server/
139
+ │ ├── app.py # FastAPI application
140
+ │ └── taskmanager_environment.py # Core environment logic and reward function
141
+ └── Dockerfile # Container definition
142
+ ```
__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Taskmanager Environment."""
8
+
9
+ from .client import TaskmanagerEnv
10
+ from .models import TaskmanagerAction, TaskmanagerObservation
11
+
12
+ __all__ = [
13
+ "TaskmanagerAction",
14
+ "TaskmanagerObservation",
15
+ "TaskmanagerEnv",
16
+ ]
client.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """Taskmanager Environment Client."""
5
+
6
+ from typing import Dict, List, Optional
7
+
8
+ from openenv.core import EnvClient
9
+ from openenv.core.client_types import StepResult
10
+ from openenv.core.env_server.types import State
11
+
12
+ from models import TaskmanagerAction, TaskmanagerObservation
13
+
14
+
15
+ class TaskmanagerEnv(EnvClient[TaskmanagerAction, TaskmanagerObservation, State]):
16
+ """
17
+ Client for the Task Scheduling Environment.
18
+
19
+ Supports:
20
+ - default reset()
21
+ - custom reset with user-defined tasks
22
+ """
23
+
24
+ # ================= STEP =================
25
+
26
+ def _step_payload(self, action: TaskmanagerAction) -> Dict:
27
+ return {
28
+ "task_id": action.task_id,
29
+ }
30
+
31
+ # ================= PARSE RESULT =================
32
+
33
+ def _parse_result(self, payload: Dict) -> StepResult[TaskmanagerObservation]:
34
+ obs_data = payload.get("observation", {})
35
+
36
+ observation = TaskmanagerObservation(
37
+ tasks=obs_data.get("tasks", []),
38
+ current_time=obs_data.get("current_time", 0),
39
+ steps_left=obs_data.get("steps_left", 0),
40
+ done=payload.get("done", False),
41
+ reward=payload.get("reward"),
42
+ metadata=obs_data.get("metadata", {}),
43
+ )
44
+
45
+ return StepResult(
46
+ observation=observation,
47
+ reward=payload.get("reward"),
48
+ done=payload.get("done", False),
49
+ )
50
+
51
+ # ================= PARSE STATE =================
52
+
53
+ def _parse_state(self, payload: Dict) -> State:
54
+ return State(
55
+ episode_id=payload.get("episode_id"),
56
+ step_count=payload.get("step_count", 0),
57
+ )
58
+
59
+ # ================= CUSTOM RESET =================
60
+
61
+ async def reset(
62
+ self, tasks: Optional[List[Dict]] = None, **kwargs
63
+ ) -> StepResult[TaskmanagerObservation]:
64
+ """
65
+ Reset the environment. If tasks are provided, they will be used instead of the predefined task list.
66
+ """
67
+ payload = kwargs.pop("config", {}) or {}
68
+ if tasks is not None:
69
+ payload["tasks"] = tasks
70
+
71
+ # Call underlying reset via super
72
+ return await super().reset(config=payload, **kwargs)
grader.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # grader.py
2
+
3
+
4
+ def compute_score(total_reward, max_possible_reward):
5
+ """
6
+ Normalize score between 0 and 1
7
+ """
8
+ if max_possible_reward <= 0:
9
+ return 0.001
10
+
11
+ score = total_reward / max_possible_reward
12
+ return max(0.001, min(0.999, score))
13
+
14
+
15
+ def evaluate_episode(rewards, max_per_step=15):
16
+ total_reward = sum(rewards)
17
+ max_possible = len(rewards) * max_per_step
18
+ return compute_score(total_reward, max_possible)
19
+
20
+
21
+ def evaluate_task1(rewards):
22
+ return evaluate_episode(rewards, 15)
23
+
24
+
25
+ def evaluate_task2(rewards):
26
+ return evaluate_episode(rewards, 15)
27
+
28
+
29
+ def evaluate_task3(rewards):
30
+ return evaluate_episode(rewards, 15)
inference.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from typing import List, Optional
4
+
5
+ from openai import OpenAI
6
+
7
+ from client import TaskmanagerEnv
8
+ from models import TaskmanagerAction
9
+ from grader import compute_score # ✅ GRADER USED
10
+
11
+
12
+ # ================= CONFIG =================
13
+
14
+ API_KEY = os.environ.get("API_KEY", "dummy")
15
+ API_BASE_URL = os.environ.get("API_BASE_URL", "http://localhost:4000")
16
+ MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
17
+
18
+ BENCHMARK = "taskmanager"
19
+ MAX_STEPS = 20
20
+ SUCCESS_SCORE_THRESHOLD = 0.6
21
+
22
+ # ================= LOGGING =================
23
+
24
+
25
+ def log_start(task: str, env: str, model: str):
26
+ print(f"[START] task={task} env={env} model={model}", flush=True)
27
+
28
+
29
+ def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]):
30
+ error_val = error if error else "null"
31
+ print(
32
+ f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}",
33
+ flush=True,
34
+ )
35
+
36
+
37
+ def log_end(success: bool, steps: int, score: float, rewards: List[float]):
38
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
39
+ print(
40
+ f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}",
41
+ flush=True,
42
+ )
43
+
44
+
45
+ # ================= SMART POLICY =================
46
+
47
+
48
+ def choose_best_ticket(tickets):
49
+ if not tickets:
50
+ return None
51
+
52
+ def score(ticket):
53
+ type_score = {"bug": 3, "feature": 2, "enhancement": 1}
54
+ return (
55
+ type_score.get(ticket["type"], 0),
56
+ ticket["priority"],
57
+ -ticket["deadline"],
58
+ )
59
+
60
+ best = sorted(tickets, key=score, reverse=True)[0]
61
+ return best["id"]
62
+
63
+
64
+ # ================= MAIN =================
65
+
66
+
67
+ async def main():
68
+ client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
69
+ env = TaskmanagerEnv(base_url="http://localhost:8000")
70
+
71
+ try:
72
+ tasks_to_run = ["task-1-easy", "task-2-medium", "task-3-hard"]
73
+
74
+ for task_idx, TASK_NAME in enumerate(tasks_to_run):
75
+ rewards: List[float] = []
76
+ steps_taken = 0
77
+ success = False
78
+ score = 0.0
79
+
80
+ log_start(TASK_NAME, BENCHMARK, MODEL_NAME)
81
+
82
+ # 🔥 RESET ENV (tickets auto-generated)
83
+ result = await env.reset()
84
+ obs = result.observation
85
+
86
+ try:
87
+ client.chat.completions.create(
88
+ model=MODEL_NAME,
89
+ messages=[{"role": "user", "content": "hello"}],
90
+ max_tokens=1,
91
+ )
92
+ except Exception:
93
+ pass
94
+
95
+ for step in range(1, MAX_STEPS + 1):
96
+ if result.done:
97
+ break
98
+
99
+ tickets = obs.tasks
100
+ ticket_id = choose_best_ticket(tickets)
101
+
102
+ if ticket_id is None:
103
+ break
104
+
105
+ # 🔥 STEP
106
+ result = await env.step(TaskmanagerAction(task_id=ticket_id))
107
+ obs = result.observation
108
+
109
+ reward = result.reward or 0.0
110
+ done = result.done
111
+ error = None
112
+
113
+ rewards.append(reward)
114
+ steps_taken = step
115
+
116
+ log_step(
117
+ step=step,
118
+ action=f"resolve_ticket_{ticket_id}",
119
+ reward=reward,
120
+ done=done,
121
+ error=error,
122
+ )
123
+
124
+ if done:
125
+ break
126
+
127
+ # ================= GRADER =================
128
+ total_reward = sum(rewards)
129
+ max_per_step = 15
130
+ max_possible = len(rewards) * max_per_step
131
+ score = compute_score(total_reward, max_possible) # ✅ GRADER USED
132
+ success = score >= SUCCESS_SCORE_THRESHOLD
133
+
134
+ log_end(success, steps_taken, score, rewards)
135
+
136
+ finally:
137
+ try:
138
+ await env.close()
139
+ except Exception:
140
+ pass
141
+
142
+
143
+ if __name__ == "__main__":
144
+ asyncio.run(main())
models.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for the Taskmanager Environment.
9
+
10
+ The taskmanager environment is a simple test environment that echoes back messages.
11
+ """
12
+
13
+ from openenv.core.env_server.types import Action, Observation
14
+ from pydantic import Field
15
+ from typing import List, Dict
16
+
17
+
18
+ class TaskmanagerAction(Action):
19
+ """Action for the Taskmanager environment - just a message to echo."""
20
+
21
+ task_id: int = Field(..., description="Task to execute")
22
+
23
+
24
+ class TaskmanagerObservation(Observation):
25
+ """Observation for Task Scheduling Environment"""
26
+
27
+ tasks: List[Dict] = Field(
28
+ default_factory=list, description="List of remaining tickets"
29
+ )
30
+ current_time: int = Field(default=0, description="Current time in the schedule")
31
+ steps_left: int = Field(default=0, description="Steps remaining in episode")
openenv.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: ai-ticket-prioritization-env
2
+
3
+ description: >
4
+ A reinforcement learning environment simulating real-world software engineering
5
+ ticket prioritization. The agent must decide the optimal order to resolve tickets
6
+ (bugs, features, enhancements) under deadlines and resource constraints.
7
+
8
+ entry_point: server.taskmanager_environment:TaskmanagerEnvironment
9
+
10
+ action_space: TaskmanagerAction
11
+ observation_space: TaskmanagerObservation
12
+
13
+ max_steps: 20
14
+
15
+ tags:
16
+ - openenv
17
+ - reinforcement-learning
18
+ - scheduling
19
+ - software-engineering
openenv_taskmanager.egg-info/PKG-INFO ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.4
2
+ Name: openenv-taskmanager
3
+ Version: 0.1.0
4
+ Summary: Taskmanager environment for OpenEnv
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: openenv-core[core]>=0.2.1
7
+ Provides-Extra: dev
8
+ Requires-Dist: pytest>=8.0.0; extra == "dev"
9
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
openenv_taskmanager.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ README.md
2
+ pyproject.toml
3
+ ./__init__.py
4
+ ./client.py
5
+ ./models.py
6
+ openenv_taskmanager.egg-info/PKG-INFO
7
+ openenv_taskmanager.egg-info/SOURCES.txt
8
+ openenv_taskmanager.egg-info/dependency_links.txt
9
+ openenv_taskmanager.egg-info/entry_points.txt
10
+ openenv_taskmanager.egg-info/requires.txt
11
+ openenv_taskmanager.egg-info/top_level.txt
12
+ server/__init__.py
13
+ server/app.py
14
+ server/taskmanager_environment.py
openenv_taskmanager.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
openenv_taskmanager.egg-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [console_scripts]
2
+ server = taskmanager.server.app:main
openenv_taskmanager.egg-info/requires.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openenv-core[core]>=0.2.1
2
+
3
+ [dev]
4
+ pytest>=8.0.0
5
+ pytest-cov>=4.0.0
openenv_taskmanager.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ taskmanager
pyproject.toml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ [build-system]
8
+ requires = ["setuptools>=45", "wheel"]
9
+ build-backend = "setuptools.build_meta"
10
+
11
+ [project]
12
+ name = "openenv-taskmanager"
13
+ version = "0.1.0"
14
+ description = "Taskmanager environment for OpenEnv"
15
+ requires-python = ">=3.10"
16
+ dependencies = [
17
+ # Core OpenEnv runtime (provides FastAPI server + HTTP client types)
18
+ # install from github
19
+ # "openenv-core[core] @ git+https://github.com/meta-pytorch/OpenEnv.git",
20
+ "openenv-core[core]>=0.2.1",
21
+ # Environment-specific dependencies
22
+ # Add all dependencies needed for your environment here
23
+ # Examples:
24
+ # "numpy>=1.19.0",
25
+ # "torch>=2.0.0",
26
+ # "gymnasium>=0.29.0",
27
+ # "openspiel>=1.0.0",
28
+ # "smolagents>=1.22.0,<2",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ dev = [
33
+ "pytest>=8.0.0",
34
+ "pytest-cov>=4.0.0",
35
+ ]
36
+
37
+ [project.scripts]
38
+ # Server entry point - enables running via: uv run --project . server
39
+ # or: python -m taskmanager.server.app
40
+ server = "taskmanager.server.app:main"
41
+
42
+ [tool.setuptools]
43
+ include-package-data = true
44
+ packages = ["taskmanager", "taskmanager.server"]
45
+ package-dir = { "taskmanager" = ".", "taskmanager.server" = "server" }
server/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Taskmanager environment server components."""
8
+
9
+ from .taskmanager_environment import TaskmanagerEnvironment
10
+
11
+ __all__ = ["TaskmanagerEnvironment"]
server/app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """
5
+ FastAPI application for the Taskmanager Environment.
6
+ """
7
+
8
+ from fastapi.responses import HTMLResponse
9
+ from fastapi import APIRouter
10
+ import os
11
+
12
+ try:
13
+ from openenv.core.env_server.http_server import create_app
14
+ except Exception as e:
15
+ raise ImportError(
16
+ "openenv is required. Install dependencies with 'uv sync'"
17
+ ) from e
18
+
19
+ try:
20
+ from ..models import TaskmanagerAction, TaskmanagerObservation
21
+ from .taskmanager_environment import TaskmanagerEnvironment
22
+ except ImportError:
23
+ from models import TaskmanagerAction, TaskmanagerObservation
24
+ from server.taskmanager_environment import TaskmanagerEnvironment
25
+
26
+
27
+ # ================= CREATE APP =================
28
+
29
+ app = create_app(
30
+ TaskmanagerEnvironment,
31
+ TaskmanagerAction,
32
+ TaskmanagerObservation,
33
+ env_name="taskmanager",
34
+ max_concurrent_envs=1,
35
+ )
36
+
37
+ # ================= ROUTER =================
38
+
39
+ router = APIRouter()
40
+
41
+ # 🔥 Serve demo UI
42
+ @router.get("/", response_class=HTMLResponse)
43
+ def home():
44
+ file_path = os.path.join(os.path.dirname(__file__), "..", "demo.html")
45
+ with open(file_path, "r", encoding="utf-8") as f:
46
+ return f.read()
47
+
48
+
49
+ # 🔥 Run full agent (for UI)
50
+ @router.get("/run-agent")
51
+ def run_agent():
52
+ env = TaskmanagerEnvironment()
53
+
54
+ obs = env.reset()
55
+ total_reward = 0
56
+ steps = []
57
+
58
+ for _ in range(20):
59
+ tickets = obs.tasks
60
+
61
+ if not tickets:
62
+ break
63
+
64
+ # 🔥 same logic as inference
65
+ def score(t):
66
+ type_score = {"bug": 3, "feature": 2, "enhancement": 1}
67
+ return (
68
+ type_score.get(t["type"], 0),
69
+ t["priority"],
70
+ -t["deadline"]
71
+ )
72
+
73
+ best = sorted(tickets, key=score, reverse=True)[0]
74
+
75
+ obs = env.step(type("obj", (), {"task_id": best["id"]}))
76
+
77
+ total_reward += obs.reward
78
+
79
+ steps.append({
80
+ "chosen": best,
81
+ "remaining": obs.tasks,
82
+ "reward": obs.reward,
83
+ "time": obs.current_time
84
+ })
85
+
86
+ if obs.done:
87
+ break
88
+
89
+ # 🔥 normalize score
90
+ score = max(0.0, min(1.0, total_reward / 200))
91
+
92
+ return {
93
+ "steps": steps,
94
+ "final_score": score
95
+ }
96
+
97
+
98
+ app.include_router(router)
99
+
100
+ # ================= MAIN =================
101
+
102
+ def main(host: str = "0.0.0.0", port: int = 8000):
103
+ import uvicorn
104
+ uvicorn.run(app, host=host, port=port)
105
+
106
+
107
+ if __name__ == "__main__":
108
+ main()
server/requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ openenv[core]>=0.2.0
2
+ fastapi>=0.115.0
3
+ uvicorn>=0.24.0
4
+
5
+
6
+
server/taskmanager_environment.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+
4
+ """
5
+ AI Ticket Prioritization Environment (Jira-like)
6
+
7
+ Simulates a real-world engineering workflow:
8
+ - Bug fixes (critical)
9
+ - Feature development
10
+ - UI enhancements
11
+
12
+ Agent must prioritize tickets to maximize business impact and avoid SLA violations.
13
+ """
14
+
15
+ from dataclasses import dataclass
16
+ from uuid import uuid4
17
+ import random
18
+
19
+ try:
20
+ from openenv.core.env_server.interfaces import Environment
21
+ from openenv.core.env_server.types import State
22
+ except ImportError:
23
+
24
+ class Environment:
25
+ pass
26
+
27
+ @dataclass
28
+ class State:
29
+ episode_id: str
30
+ step_count: int
31
+
32
+
33
+ try:
34
+ from ..models import TaskmanagerAction, TaskmanagerObservation
35
+ except ImportError:
36
+ from models import TaskmanagerAction, TaskmanagerObservation
37
+
38
+
39
+ class TaskmanagerEnvironment(Environment):
40
+ SUPPORTS_CONCURRENT_SESSIONS: bool = True
41
+
42
+ def __init__(self):
43
+ self._state = State(episode_id=str(uuid4()), step_count=0)
44
+ self.current_time = 0
45
+ self.tickets = []
46
+ self.max_steps = 20
47
+
48
+ self.total_reward = 0
49
+ self.episode_count = 0
50
+
51
+ # ================= TICKET GENERATOR =================
52
+
53
+ def generate_tickets(self, num_tickets):
54
+ tickets = []
55
+ current_time = 0
56
+
57
+ for i in range(num_tickets):
58
+ ticket_type = random.choice(["bug", "feature", "enhancement"])
59
+
60
+ effort = random.randint(1, 3)
61
+
62
+ # 🔥 ensure feasible deadline
63
+ slack = random.randint(3, 8)
64
+ deadline = current_time + effort + slack
65
+
66
+ priority = random.randint(1, 5)
67
+
68
+ ticket = {
69
+ "id": i + 1,
70
+ "deadline": deadline,
71
+ "priority": priority,
72
+ "effort": effort,
73
+ "type": ticket_type,
74
+ }
75
+
76
+ tickets.append(ticket)
77
+
78
+ # update time so sequence is solvable
79
+ current_time += effort
80
+
81
+ # 🔥 shuffle so agent must think
82
+ random.shuffle(tickets)
83
+
84
+ return tickets
85
+
86
+ # ================= RESET =================
87
+
88
+ def reset(self) -> TaskmanagerObservation:
89
+ self._state = State(episode_id=str(uuid4()), step_count=0)
90
+ self.current_time = 0
91
+
92
+ self.episode_count += 1
93
+
94
+ avg_reward = self.total_reward / max(1, self.episode_count)
95
+
96
+ if avg_reward < 5:
97
+ num_tickets = 5
98
+ elif avg_reward < 15:
99
+ num_tickets = 8
100
+ else:
101
+ num_tickets = 12
102
+
103
+ self.tickets = self.generate_tickets(num_tickets)
104
+
105
+ print(
106
+ f"Episode {self.episode_count} | Tickets: {num_tickets} | Avg reward: {avg_reward:.2f}"
107
+ )
108
+
109
+ return TaskmanagerObservation(
110
+ tasks=self.tickets, # ⚠️ keep 'tasks' for compatibility
111
+ current_time=self.current_time,
112
+ steps_left=self.max_steps,
113
+ reward=0.0,
114
+ done=False,
115
+ )
116
+
117
+ # ================= STEP =================
118
+
119
+ def step(self, action: TaskmanagerAction) -> TaskmanagerObservation:
120
+ self._state.step_count += 1
121
+
122
+ reward = 0
123
+
124
+ ticket = next((t for t in self.tickets if t["id"] == action.task_id), None)
125
+
126
+ if ticket:
127
+ self.current_time += ticket["effort"]
128
+
129
+ # 🎯 BASE REWARD
130
+ if self.current_time <= ticket["deadline"]:
131
+ reward = ticket["priority"] * 3 # boosted reward for being on time
132
+ else:
133
+ delay = self.current_time - ticket["deadline"]
134
+ # Soft penalty: base priority minus a small delay fraction (can still be positive if slightly late)
135
+ reward = max(-2, ticket["priority"] - (delay * 0.5))
136
+
137
+ # 🔥 TYPE MULTIPLIER (REAL-WORLD LOGIC)
138
+ if ticket["type"] == "bug":
139
+ reward *= 2 # critical
140
+ elif ticket["type"] == "feature":
141
+ reward *= 1.5
142
+ else: # enhancement
143
+ reward *= 1
144
+
145
+ # remove ticket
146
+ self.tickets = [t for t in self.tickets if t["id"] != action.task_id]
147
+
148
+ else:
149
+ reward = -1
150
+
151
+ self.total_reward += reward
152
+
153
+ done = len(self.tickets) == 0 or self._state.step_count >= self.max_steps
154
+
155
+ return TaskmanagerObservation(
156
+ tasks=self.tickets,
157
+ current_time=self.current_time,
158
+ steps_left=self.max_steps - self._state.step_count,
159
+ reward=reward,
160
+ done=done,
161
+ metadata={
162
+ "step": self._state.step_count,
163
+ "remaining_tickets": len(self.tickets),
164
+ },
165
+ )
166
+
167
+ @property
168
+ def state(self) -> State:
169
+ return self._state
test.py ADDED
File without changes
uv.lock ADDED
The diff for this file is too large to render. See raw diff