Spaces:
Sleeping
Sleeping
File size: 3,306 Bytes
0b81240 8d62fdb 0b81240 c90be96 0b81240 c90be96 0b81240 c90be96 0b81240 c90be96 0b81240 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | name: DataSelectEnv
version: "1.0.0"
description: >
OpenEnv RL environment for data curation in ML training.
Agents learn to select high-quality training data from a noisy pool
under budget constraints, balancing uncertainty, diversity, and noise
avoidance to incrementally improve a classifier's validation performance.
tags:
- openenv
- active-learning
- data-curation
- noisy-labels
- machine-learning
- reinforcement-learning
authors:
- InfraNova
observation_space:
type: object
properties:
remaining_budget:
type: integer
description: Samples remaining in the selection budget
diversity_score:
type: number
description: Standard deviation of the current training set (proxy for diversity)
noise_estimate:
type: number
description: Estimated fraction of noisy samples remaining in the pool
current_performance:
type: number
description: Current model validation performance (1 / (1 + log_loss))
samples_available:
type: integer
description: Number of samples remaining in the unlabeled pool
action_space:
type: object
required:
- action_type
- batch_size
- strategy_weights
properties:
action_type:
type: string
enum: [select_batch, stop]
description: Select a batch of data or stop the episode early
batch_size:
type: integer
minimum: 0
description: Number of samples to select this step
strategy_weights:
type: object
description: Weights for each sampling strategy (normalized internally)
properties:
uncertainty:
type: number
minimum: 0.0
diversity:
type: number
minimum: 0.0
random:
type: number
minimum: 0.0
tasks:
- id: easy
difficulty: easy
description: >
Clean dataset (flip_y=0.05), budget=300, max_steps=15.
Agent must reach validation performance > 0.62.
Score is normalized over range [0.55, 0.75].
success_criteria: "current_performance > 0.62"
- id: medium
difficulty: medium
description: >
High noise (flip_y=0.25), budget=150, max_steps=12.
Agent must reach performance > 0.52 while keeping average
noise selection rate below 0.50.
success_criteria: "current_performance > 0.52 AND avg_noise_ratio < 0.50"
- id: hard
difficulty: hard
description: >
High noise (flip_y=0.30), tight budget=100, max_steps=8.
Agent must hit performance > 0.58 efficiently.
Grader scores performance and budget efficiency jointly.
success_criteria: "current_performance > 0.58, scored jointly with efficiency"
reward:
type: continuous
range: [-inf, +inf]
description: >
Shaped reward combining performance gain, diversity bonus,
redundancy penalty, noise penalty, and budget cost.
Provides dense signal throughout the episode — not just at termination.
endpoints:
websocket: WS /ws # primary transport; required on HF Spaces
reset: POST /reset
step: POST /step
state: GET /state
tasks: GET /tasks
grader: POST /grader
baseline: GET /baseline
health: GET /health |