| { |
| "hidden_size": 4096, |
| "num_head_layers": 1, |
| "num_heads": 8, |
| "use_adaln": true, |
| "use_mask_embeddings": true, |
| "use_block_embeddings": false, |
| "learning_rate": 1e-05, |
| "weight_decay": 0.01, |
| "num_epochs": 3, |
| "max_grad_norm": 1.0, |
| "per_gpu_policy_update_prompt_batch_size": 16, |
| "group_size": 16, |
| "per_gpu_sampling_mini_batch_size": 8, |
| "per_gpu_ref_mini_batch_size": 8, |
| "per_gpu_training_mini_batch_size": 2, |
| "update_steps_per_rollouts": 2, |
| "gradient_accumulation_steps": 32, |
| "use_cached_hidden_states": true, |
| "use_gradient_checkpointing": false, |
| "use_step_probs": true, |
| "optimize_denoiser": false, |
| "denoiser_learning_rate": null, |
| "denoiser_weight_decay": null, |
| "kl_coef": 0.1, |
| "clip_range": 0.2, |
| "advantage_clip_min": null, |
| "advantage_clip_max": null, |
| "ratio_cap": null, |
| "use_ref_kl": true, |
| "reward_weights": { |
| "judge": 1.0, |
| "correctness": 1.0, |
| "lookahead": 1.0, |
| "format": 0.5 |
| }, |
| "alpha_values": [ |
| 1.0 |
| ], |
| "steps": 256, |
| "gen_length": 512, |
| "block_length": 64, |
| "temperature": 1.0, |
| "temp_explore": 1.0, |
| "use_sampling_new": false, |
| "mixed_precision": true, |
| "reserve_teacher_gpu": true, |
| "teacher_device_index": 7, |
| "log_interval": 1, |
| "eval_interval": 100, |
| "save_interval": 1000, |
| "use_wandb": true, |
| "wandb_project": "planner-rl-8gpu", |
| "wandb_run_name": "my-run-8gpu", |
| "log_text_samples": 4, |
| "debug_logging": false, |
| "debug_log_interval": 1, |
| "output_dir": "/accounts/projects/songmei/hengyuf/hengyu_scratch/hengyuf/ete-checkpoints/RL", |
| "resume_from_checkpoint": null, |
| "compute_logprob_tensor": false |
| } |