model: n_layers: 2 n_heads: 4 d_model: 128 max_length: 30 data: env_id: "MiniGrid-Empty-8x8-v0" num_episodes: 1000 collection_method: "PPO-Teacher" interpretability: dla_threshold: 0.1 patching_metric: "logit_diff" sae: expansion_factor: 8 l1_coeff: 0.0005 num_episodes: 100