Spaces:
Running
Running
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """Difficulty presets for WorkflowArena.""" | |
| from __future__ import annotations | |
| from workflow_arena.models import DifficultyPreset, DifficultyPresetConfig, GraderTarget | |
| PRESET_CONFIGS: dict[DifficultyPreset, DifficultyPresetConfig] = { | |
| DifficultyPreset.EASY: DifficultyPresetConfig( | |
| preset=DifficultyPreset.EASY, | |
| min_tasks=8, | |
| max_tasks=12, | |
| edge_probability=0.14, | |
| duration_min=1, | |
| duration_max=4, | |
| priority_min=1, | |
| priority_max=4, | |
| worker_count=3, | |
| deadline_tightness=0.22, | |
| time_budget_multiplier=None, | |
| worker_outage_rate=0.0, | |
| worker_outage_duration_min=0, | |
| worker_outage_duration_max=0, | |
| task_retry_failure_rate=0.0, | |
| max_task_retries=0, | |
| grader_target=GraderTarget( | |
| description=( | |
| "Reward agents that keep workers utilized and avoid obvious idle time on a " | |
| "small, low-pressure workflow." | |
| ), | |
| score_band_hint="0.8+ means near-greedy scheduling, 0.5 is acceptable, below 0.3 is weak.", | |
| ), | |
| ), | |
| DifficultyPreset.MEDIUM: DifficultyPresetConfig( | |
| preset=DifficultyPreset.MEDIUM, | |
| min_tasks=12, | |
| max_tasks=18, | |
| edge_probability=0.22, | |
| duration_min=1, | |
| duration_max=6, | |
| priority_min=1, | |
| priority_max=6, | |
| worker_count=4, | |
| deadline_tightness=0.40, | |
| time_budget_multiplier=1.6, | |
| worker_outage_rate=0.0, | |
| worker_outage_duration_min=0, | |
| worker_outage_duration_max=0, | |
| task_retry_failure_rate=0.0, | |
| max_task_retries=0, | |
| grader_target=GraderTarget( | |
| description=( | |
| "Reward agents that balance utilization, deadline adherence, and critical-path " | |
| "awareness on a moderately branching workflow." | |
| ), | |
| score_band_hint="0.75+ is strong, 0.45 to 0.75 is competitive, below 0.3 misses core tradeoffs.", | |
| ), | |
| ), | |
| DifficultyPreset.HARD: DifficultyPresetConfig( | |
| preset=DifficultyPreset.HARD, | |
| min_tasks=22, | |
| max_tasks=36, | |
| edge_probability=0.37, | |
| duration_min=2, | |
| duration_max=9, | |
| priority_min=1, | |
| priority_max=8, | |
| worker_count=2, | |
| deadline_tightness=0.78, | |
| time_budget_multiplier=1.45, | |
| worker_outage_rate=0.2, | |
| worker_outage_duration_min=2, | |
| worker_outage_duration_max=4, | |
| task_retry_failure_rate=0.12, | |
| max_task_retries=1, | |
| grader_target=GraderTarget( | |
| description=( | |
| "Reward agents that identify and schedule long-running critical tasks early while " | |
| "protecting high-priority deadlines under frequent worker-capacity bottlenecks." | |
| ), | |
| score_band_hint="0.7+ is excellent, 0.4 to 0.7 is competent, below 0.25 is poor planning.", | |
| ), | |
| ), | |
| } | |
| def get_preset_config(preset: DifficultyPreset) -> DifficultyPresetConfig: | |
| """Return the immutable config for a preset.""" | |
| return PRESET_CONFIGS[preset].model_copy(deep=True) | |