Document alpha=0.6 and alpha=1.0 variants in README and train.yaml

5384750 11 days ago

990 Bytes

	parameters:
	project_name: jaxgmg2_3phase_optim_state
	action: rl
	rl_action: train

	lr: 5e-5
	discount_rate: 0.98
	cheese_loc: any
	env_layout: open
	mask_type: first_episode
	use_prev_action: false
	trim_episodes: false
	log_optimizer_state: true

	num_total_env_steps: 10_000_000_000
	num_levels: 9600
	grad_acc_per_chunk: 5
	num_rollout_steps: 64

	seed_formula: "{int(discount_rate100):02d}{int(alpha10):02d}{run_id:02d}"
	ckpt_dir: jaxgmg2_3phase_optim_state
	f_str_ckpt: "al_{alpha}_g_{discount_rate}_id_{run_id}_seed_{seed}"
	eval_schedule: "0:1,250:2,500:5,2000:10"

	wandb_project: jaxgmg2_3phase_optim_state
	use_wandb: true
	use_hf: true
	no_tqdm: true
	ntfy: david_jaxgmg

	sweep:
	- - alpha: 0.6
	- alpha: 1.0
	- - run_id: 15
	- run_id: 16
	- run_id: 17
	- run_id: 18
	- run_id: 19
	- run_id: 20
	- run_id: 21
	- run_id: 22
	- run_id: 23
	- run_id: 24
	- run_id: 25
	- run_id: 26
	- run_id: 27
	- run_id: 28
	- run_id: 29
	- run_id: 30