dquarel's picture
Document alpha=0.6 and alpha=1.0 variants in README and train.yaml
5384750
parameters:
project_name: jaxgmg2_3phase_optim_state
action: rl
rl_action: train
lr: 5e-5
discount_rate: 0.98
cheese_loc: any
env_layout: open
mask_type: first_episode
use_prev_action: false
trim_episodes: false
log_optimizer_state: true
num_total_env_steps: 10_000_000_000
num_levels: 9600
grad_acc_per_chunk: 5
num_rollout_steps: 64
seed_formula: "{int(discount_rate*100):02d}{int(alpha*10):02d}{run_id:02d}"
ckpt_dir: jaxgmg2_3phase_optim_state
f_str_ckpt: "al_{alpha}_g_{discount_rate}_id_{run_id}_seed_{seed}"
eval_schedule: "0:1,250:2,500:5,2000:10"
wandb_project: jaxgmg2_3phase_optim_state
use_wandb: true
use_hf: true
no_tqdm: true
ntfy: david_jaxgmg
sweep:
- - alpha: 0.6
- alpha: 1.0
- - run_id: 15
- run_id: 16
- run_id: 17
- run_id: 18
- run_id: 19
- run_id: 20
- run_id: 21
- run_id: 22
- run_id: 23
- run_id: 24
- run_id: 25
- run_id: 26
- run_id: 27
- run_id: 28
- run_id: 29
- run_id: 30