{ "model_name": "unsloth/Qwen2.5-3B-Instruct", "max_steps": 500, "num_episodes": 200, "max_samples": 2000, "num_generations": 4, "learning_rate": 5e-05, "beta": 0.04, "lora_rank": 8, "hint_fraction": 0.0, "profile_mode": "continuous", "output_dir": "/tmp/rhythm_env/outputs/rhythmenv_meta_trained", "use_simple_reward": false, "report_to": "none" }