*.7z filter=lfs diff=lfs merge=lfs -text *.arrow filter=lfs diff=lfs merge=lfs -text *.bin filter=lfs diff=lfs merge=lfs -text *.bz2 filter=lfs diff=lfs merge=lfs -text *.ckpt filter=lfs diff=lfs merge=lfs -text *.ftz filter=lfs diff=lfs merge=lfs -text *.gz filter=lfs diff=lfs merge=lfs -text *.h5 filter=lfs diff=lfs merge=lfs -text *.joblib filter=lfs diff=lfs merge=lfs -text *.lfs.* filter=lfs diff=lfs merge=lfs -text *.mlmodel filter=lfs diff=lfs merge=lfs -text *.model filter=lfs diff=lfs merge=lfs -text *.msgpack filter=lfs diff=lfs merge=lfs -text *.npy filter=lfs diff=lfs merge=lfs -text *.npz filter=lfs diff=lfs merge=lfs -text *.onnx filter=lfs diff=lfs merge=lfs -text *.ot filter=lfs diff=lfs merge=lfs -text *.parquet filter=lfs diff=lfs merge=lfs -text *.pb filter=lfs diff=lfs merge=lfs -text *.pickle filter=lfs diff=lfs merge=lfs -text *.pkl filter=lfs diff=lfs merge=lfs -text *.pt filter=lfs diff=lfs merge=lfs -text *.pth filter=lfs diff=lfs merge=lfs -text *.rar filter=lfs diff=lfs merge=lfs -text *.safetensors filter=lfs diff=lfs merge=lfs -text saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.tar.* filter=lfs diff=lfs merge=lfs -text *.tar filter=lfs diff=lfs merge=lfs -text *.tflite filter=lfs diff=lfs merge=lfs -text *.tgz filter=lfs diff=lfs merge=lfs -text *.wasm filter=lfs diff=lfs merge=lfs -text *.xz filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/combined_summary.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/error_analysis.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/seed_comparison_dseed.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/seed_comparison_iseed.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/success_by_gap.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/success_by_number.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/success_by_position.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/training_progression_number.png filter=lfs diff=lfs merge=lfs -text outputs/comprehensive_intervention/plots/training_progression_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1337_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1338_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1339_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1337_is1340_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1337_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1338_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1339_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt60000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_ckpt80000/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/perlocation/perlocation_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/perlocation/perlocation_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/pernumber/pernumber_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_N256_B16_ds1338_is1340_final/pernumber/pernumber_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI1000000_E64_H1_L2_ds1337_is1337_ckpt1000000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI100000_E64_H1_L2_ds1337_is1337_ckpt100000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI150000_E64_H1_L2_ds1337_is1337_ckpt150000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI200000_E64_H1_L2_ds1337_is1337_ckpt200000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI250000_E64_H1_L2_ds1337_is1337_ckpt250000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI300000_E64_H1_L2_ds1337_is1337_ckpt300000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI350000_E64_H1_L2_ds1337_is1337_ckpt350000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/hijack_sample_count_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI400000_E64_H1_L2_ds1337_is1337_ckpt400000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI450000_E64_H1_L2_ds1337_is1337_ckpt450000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI500000_E64_H1_L2_ds1337_is1337_ckpt500000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/hijack_hijack_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI50000_E64_H1_L2_ds1337_is1337_ckpt50000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI550000_E64_H1_L2_ds1337_is1337_ckpt550000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI600000_E64_H1_L2_ds1337_is1337_ckpt600000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI650000_E64_H1_L2_ds1337_is1337_ckpt650000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI700000_E64_H1_L2_ds1337_is1337_ckpt700000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI750000_E64_H1_L2_ds1337_is1337_ckpt750000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI800000_E64_H1_L2_ds1337_is1337_ckpt800000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI850000_E64_H1_L2_ds1337_is1337_ckpt850000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI900000_E64_H1_L2_ds1337_is1337_ckpt900000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/ablation_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/ablation_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/ablation_per_position.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/attn_heatmaps.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/avg_attn_by_number_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/avg_attn_by_number_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/baseline_conditional_accuracy.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/cinclogits_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/cinclogits_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/hijack_breaking_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/hijack_breaking_rate_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/hijack_breaking_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/hijack_hijack_rate_bynext_heatmap_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/hijack_hijack_rate_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/hijack_sample_count_heatmap_layer1.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/intensity_layer0_asym_ub60_lb60.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/intervention_pernumber_random_layer0.png filter=lfs diff=lfs merge=lfs -text outputs/plots_V256_B16_LR3e-2_MI950000_E64_H1_L2_ds1337_is1337_ckpt950000/intervention_pernumber_separator_layer0.png filter=lfs diff=lfs merge=lfs -text