Gov_Workflow_RL / rl /configs /ppo_easy.yaml
Siddharaj Shirke
deploy: clean code-only snapshot for HF Space
df97e68
# Phase 1 -- Masked PPO on district_backlog_easy
hyperparameters:
learning_rate: 0.0003
n_steps: 512
batch_size: 64
n_epochs: 10
gamma: 0.99
gae_lambda: 0.95
clip_range: 0.2
ent_coef: 0.01
vf_coef: 0.5
max_grad_norm: 0.5
net_arch: [256, 256]
training:
total_timesteps: 200000
n_envs: 4
seed: 42
eval_freq: 16384
n_eval_episodes: 2
grader_eval_freq_multiplier: 4
enable_eval_callback: true
progress_bar: false
model_verbose: 0
callback_verbose: 0
target_scores:
district_backlog_easy: 0.80