*.7z filter=lfs diff=lfs merge=lfs -text *.arrow filter=lfs diff=lfs merge=lfs -text *.bin filter=lfs diff=lfs merge=lfs -text *.bz2 filter=lfs diff=lfs merge=lfs -text *.ckpt filter=lfs diff=lfs merge=lfs -text *.ftz filter=lfs diff=lfs merge=lfs -text *.gz filter=lfs diff=lfs merge=lfs -text *.h5 filter=lfs diff=lfs merge=lfs -text *.joblib filter=lfs diff=lfs merge=lfs -text *.lfs.* filter=lfs diff=lfs merge=lfs -text *.mlmodel filter=lfs diff=lfs merge=lfs -text *.model filter=lfs diff=lfs merge=lfs -text *.msgpack filter=lfs diff=lfs merge=lfs -text *.npy filter=lfs diff=lfs merge=lfs -text *.npz filter=lfs diff=lfs merge=lfs -text *.onnx filter=lfs diff=lfs merge=lfs -text *.ot filter=lfs diff=lfs merge=lfs -text *.parquet filter=lfs diff=lfs merge=lfs -text *.pb filter=lfs diff=lfs merge=lfs -text *.pickle filter=lfs diff=lfs merge=lfs -text *.pkl filter=lfs diff=lfs merge=lfs -text *.pt filter=lfs diff=lfs merge=lfs -text *.pth filter=lfs diff=lfs merge=lfs -text *.rar filter=lfs diff=lfs merge=lfs -text *.safetensors filter=lfs diff=lfs merge=lfs -text saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.tar.* filter=lfs diff=lfs merge=lfs -text *.tar filter=lfs diff=lfs merge=lfs -text *.tflite filter=lfs diff=lfs merge=lfs -text *.tgz filter=lfs diff=lfs merge=lfs -text *.wasm filter=lfs diff=lfs merge=lfs -text *.xz filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/combined_cuv_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/combined_cuv_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/combined_cuv_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.95-bs_32-seq_1024-iters_9536/250622_025948/eos/step_002500/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_000500/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_005000/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_006000/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_003000/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_002500/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_004500/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001500/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250728_063551/training.log filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001000/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_003500/losses_lr.png filter=lfs diff=lfs merge=lfs -text fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/eos/step_007000/losses_lr.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3700/eos_ckpt3700_wd1521.48.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3200/eos_ckpt3200_wd1521.48.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-500/eos_ckpt500_wd1335.94.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4000/eos_ckpt4000_wd1224.61.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-800/eos_ckpt800_wd1113.28.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3800/eos_ckpt3800_wd1373.05.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4200/eos_ckpt4200_wd1224.61.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-700/eos_ckpt700_wd1447.27.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3000/eos_ckpt3000_wd1595.70.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4100/eos_ckpt4100_wd1261.72.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1200/eos_ckpt1200_wd1707.03.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3100/eos_ckpt3100_wd1335.94.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3400/eos_ckpt3400_wd1150.39.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3500/eos_ckpt3500_wd1076.17.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2100/eos_ckpt2100_wd1373.05.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3600/eos_ckpt3600_wd1595.70.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2200/eos_ckpt2200_wd1076.17.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1800/eos_ckpt1800_wd1966.80.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1400/eos_ckpt1400_wd1558.59.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-100/eos_ckpt100_wd3117.19.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4600/eos_ckpt4600_wd630.86.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1100/eos_ckpt1100_wd2263.67.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1600/eos_ckpt1600_wd2226.56.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2700/eos_ckpt2700_wd1447.27.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4800/eos_ckpt4800_wd333.98.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2000/eos_ckpt2000_wd1001.95.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4700/eos_ckpt4700_wd408.20.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3900/eos_ckpt3900_wd1595.70.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1700/eos_ckpt1700_wd1373.05.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1500/eos_ckpt1500_wd2968.75.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2600/eos_ckpt2600_wd1224.61.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2300/eos_ckpt2300_wd1447.27.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4300/eos_ckpt4300_wd1001.95.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4900/eos_ckpt4900_wd19000.00.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2400/eos_ckpt2400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-600/eos_ckpt600_wd2486.33.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-200/eos_ckpt200_wd1744.14.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-400/eos_ckpt400_wd2300.78.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2500/eos_ckpt2500_wd1261.72.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2800/eos_ckpt2800_wd1707.03.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-300/eos_ckpt300_wd2634.77.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-900/eos_ckpt900_wd1966.80.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1000/eos_ckpt1000_wd3191.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4500/eos_ckpt4500_wd853.52.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-3300/eos_ckpt3300_wd1187.50.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-2900/eos_ckpt2900_wd1447.27.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1300/eos_ckpt1300_wd2449.22.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-1900/eos_ckpt1900_wd1818.36.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_al/ckpt-4400/eos_ckpt4400_wd1113.28.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4700/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3900/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1700/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1500/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2300/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4300/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4900/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2500/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-300/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-900/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4500/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3300/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2900/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1300/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1900/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3700/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-500/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-700/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4100/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3100/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3500/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2100/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-100/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1100/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2700/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs1024_wsd-5000-200-4000-5000-0_260118-043437/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-7200/eos_ckpt7200_wd853.52.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-5600/eos_ckpt5600_wd1447.27.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-2600/eos_ckpt2600_wd1187.50.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-8600/eos_ckpt8600_wd482.42.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-8800/eos_ckpt8800_wd408.20.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-6000/eos_ckpt6000_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-2400/eos_ckpt2400_wd853.52.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-5200/eos_ckpt5200_wd779.30.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-600/eos_ckpt600_wd1521.48.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-200/eos_ckpt200_wd4082.03.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-400/eos_ckpt400_wd3896.48.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-8200/eos_ckpt8200_wd519.53.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-2800/eos_ckpt2800_wd779.30.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-5000/eos_ckpt5000_wd1001.95.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-5800/eos_ckpt5800_wd927.73.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-1000/eos_ckpt1000_wd1484.38.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-9400/eos_ckpt9400_wd371.09.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-7400/eos_ckpt7400_wd593.75.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-9000/eos_ckpt9000_wd779.30.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-8000/eos_ckpt8000_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-6400/eos_ckpt6400_wd927.73.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-4400/eos_ckpt4400_wd890.62.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-3200/eos_ckpt3200_wd853.52.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-9600/eos_ckpt9600_wd74.22.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-5400/eos_ckpt5400_wd667.97.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-4000/eos_ckpt4000_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-800/eos_ckpt800_wd1410.16.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-3800/eos_ckpt3800_wd853.52.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-6200/eos_ckpt6200_wd1187.50.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-4200/eos_ckpt4200_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-8400/eos_ckpt8400_wd630.86.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-3000/eos_ckpt3000_wd1187.50.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-9800/eos_ckpt9800_wd19000.00.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-1200/eos_ckpt1200_wd1076.17.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-3400/eos_ckpt3400_wd927.73.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-6800/eos_ckpt6800_wd1298.83.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-7000/eos_ckpt7000_wd705.08.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-3600/eos_ckpt3600_wd890.62.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-6600/eos_ckpt6600_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-2200/eos_ckpt2200_wd1039.06.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-7600/eos_ckpt7600_wd742.19.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-9200/eos_ckpt9200_wd519.53.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-1800/eos_ckpt1800_wd927.73.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-1400/eos_ckpt1400_wd1224.61.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-4600/eos_ckpt4600_wd667.97.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-1600/eos_ckpt1600_wd1669.92.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-7800/eos_ckpt7800_wd927.73.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-4800/eos_ckpt4800_wd890.62.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_al/ckpt-2000/eos_ckpt2000_wd1001.95.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260118-043437/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-14800/eos_ckpt14800_wd630.86.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-18000/eos_ckpt18000_wd556.64.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-17600/eos_ckpt17600_wd1001.95.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-13200/eos_ckpt13200_wd482.42.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-7200/eos_ckpt7200_wd742.19.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-5600/eos_ckpt5600_wd742.19.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-18400/eos_ckpt18400_wd148.44.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-14000/eos_ckpt14000_wd482.42.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-15200/eos_ckpt15200_wd593.75.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-8800/eos_ckpt8800_wd630.86.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-6000/eos_ckpt6000_wd371.09.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-15600/eos_ckpt15600_wd482.42.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-16800/eos_ckpt16800_wd742.19.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-2400/eos_ckpt2400_wd667.97.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-5200/eos_ckpt5200_wd556.64.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-12000/eos_ckpt12000_wd667.97.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-11200/eos_ckpt11200_wd593.75.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-400/eos_ckpt400_wd5640.62.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-2800/eos_ckpt2800_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-19600/eos_ckpt19600_wd19000.00.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-14400/eos_ckpt14400_wd445.31.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-12400/eos_ckpt12400_wd445.31.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-8000/eos_ckpt8000_wd445.31.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-2000/eos_ckpt2000_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-17200/eos_ckpt17200_wd408.20.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-6400/eos_ckpt6400_wd705.08.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-4400/eos_ckpt4400_wd667.97.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-19200/eos_ckpt19200_wd222.66.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-10800/eos_ckpt10800_wd742.19.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-3200/eos_ckpt3200_wd630.86.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-9600/eos_ckpt9600_wd667.97.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-10000/eos_ckpt10000_wd482.42.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-4000/eos_ckpt4000_wd667.97.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-800/eos_ckpt800_wd2152.34.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-16400/eos_ckpt16400_wd779.30.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-10400/eos_ckpt10400_wd593.75.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-18800/eos_ckpt18800_wd482.42.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-11600/eos_ckpt11600_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-12800/eos_ckpt12800_wd556.64.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-8400/eos_ckpt8400_wd593.75.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-1200/eos_ckpt1200_wd1001.95.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-13600/eos_ckpt13600_wd445.31.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-6800/eos_ckpt6800_wd779.30.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-16000/eos_ckpt16000_wd445.31.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-3600/eos_ckpt3600_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-7600/eos_ckpt7600_wd779.30.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-9200/eos_ckpt9200_wd519.53.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-1600/eos_ckpt1600_wd816.41.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_al/ckpt-4800/eos_ckpt4800_wd556.64.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-14800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-18000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-17600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-13200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-18400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-14000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-15200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-15600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-16800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-12000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-11200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-19600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-14400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-12400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-17200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-19200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-10800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-10000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-16400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-10400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-18800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-11600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-12800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-13600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-16000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text BSZ-cases/nlp-ntp-pt_gpt-xs-bs_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs256_wsd-10000-400-8000-10000-0_260118-043437/BSZ-cases/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-7200/eos_ckpt7200_wd458.98.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-5600/eos_ckpt5600_wd180.66.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-2600/eos_ckpt2600_wd283.20.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-8600/eos_ckpt8600_wd322.27.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-8800/eos_ckpt8800_wd317.38.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-6000/eos_ckpt6000_wd507.81.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-2400/eos_ckpt2400_wd117.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-5200/eos_ckpt5200_wd336.91.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-600/eos_ckpt600_wd463.87.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-200/eos_ckpt200_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-400/eos_ckpt400_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-8200/eos_ckpt8200_wd263.67.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-2800/eos_ckpt2800_wd234.38.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-5000/eos_ckpt5000_wd263.67.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-5800/eos_ckpt5800_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-1000/eos_ckpt1000_wd229.49.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-9400/eos_ckpt9400_wd390.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-7400/eos_ckpt7400_wd229.49.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-9000/eos_ckpt9000_wd424.80.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-8000/eos_ckpt8000_wd400.39.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-6400/eos_ckpt6400_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-4400/eos_ckpt4400_wd244.14.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-3200/eos_ckpt3200_wd307.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-9600/eos_ckpt9600_wd229.49.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-5400/eos_ckpt5400_wd263.67.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-4000/eos_ckpt4000_wd292.97.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-800/eos_ckpt800_wd458.98.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-3800/eos_ckpt3800_wd209.96.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-6200/eos_ckpt6200_wd322.27.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-4200/eos_ckpt4200_wd268.55.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-8400/eos_ckpt8400_wd224.61.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-3000/eos_ckpt3000_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-1200/eos_ckpt1200_wd507.81.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-9800/eos_ckpt9800_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-3400/eos_ckpt3400_wd302.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-6800/eos_ckpt6800_wd268.55.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-7000/eos_ckpt7000_wd161.13.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-3600/eos_ckpt3600_wd288.09.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-6600/eos_ckpt6600_wd478.52.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-2200/eos_ckpt2200_wd263.67.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-7600/eos_ckpt7600_wd312.50.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-9200/eos_ckpt9200_wd219.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-1800/eos_ckpt1800_wd253.91.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-1400/eos_ckpt1400_wd297.85.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-4600/eos_ckpt4600_wd283.20.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-1600/eos_ckpt1600_wd341.80.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-7800/eos_ckpt7800_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-4800/eos_ckpt4800_wd249.02.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_al/ckpt-2000/eos_ckpt2000_wd278.32.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.016-b0.9-0.95-eps1e-08-wd0.1-bs512_cos-10000-400-8000-10000-0_260117-073412/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6400/eos_ckpt6400_wd1113.28.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6400/eos_ckpt6400_wd890.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4400/eos_ckpt4400_wd779.30.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4400/eos_ckpt4400_wd964.84.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3200/eos_ckpt3200_wd630.86.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3200/eos_ckpt3200_wd816.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9600/eos_ckpt9600_wd742.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9600/eos_ckpt9600_wd927.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5400/eos_ckpt5400_wd816.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5400/eos_ckpt5400_wd705.08.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4000/eos_ckpt4000_wd630.86.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4000/eos_ckpt4000_wd742.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-800/eos_ckpt800_wd1818.36.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-800/eos_ckpt800_wd1187.50.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3800/eos_ckpt3800_wd593.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3800/eos_ckpt3800_wd556.64.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6200/eos_ckpt6200_wd853.52.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6200/eos_ckpt6200_wd1150.39.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4200/eos_ckpt4200_wd667.97.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4200/eos_ckpt4200_wd630.86.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8400/eos_ckpt8400_wd742.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8400/eos_ckpt8400_wd779.30.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3000/eos_ckpt3000_wd1150.39.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3000/eos_ckpt3000_wd853.52.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9800/eos_ckpt9800_wd1261.72.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9800/eos_ckpt9800_wd1224.61.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1200/eos_ckpt1200_wd1076.17.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1200/eos_ckpt1200_wd853.52.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3400/eos_ckpt3400_wd519.53.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3400/eos_ckpt3400_wd630.86.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6800/eos_ckpt6800_wd927.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6800/eos_ckpt6800_wd1076.17.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7000/eos_ckpt7000_wd667.97.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7000/eos_ckpt7000_wd927.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3600/eos_ckpt3600_wd593.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-3600/eos_ckpt3600_wd556.64.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6600/eos_ckpt6600_wd705.08.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6600/eos_ckpt6600_wd964.84.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2200/eos_ckpt2200_wd816.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7200/eos_ckpt7200_wd779.30.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7200/eos_ckpt7200_wd1150.39.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5600/eos_ckpt5600_wd779.30.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5600/eos_ckpt5600_wd1335.94.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2600/eos_ckpt2600_wd1039.06.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8600/eos_ckpt8600_wd705.08.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8600/eos_ckpt8600_wd964.84.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8800/eos_ckpt8800_wd1224.61.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8800/eos_ckpt8800_wd853.52.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6000/eos_ckpt6000_wd890.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-6000/eos_ckpt6000_wd519.53.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2400/eos_ckpt2400_wd742.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2400/eos_ckpt2400_wd705.08.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5200/eos_ckpt5200_wd630.86.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5200/eos_ckpt5200_wd593.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-600/eos_ckpt600_wd1669.92.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-600/eos_ckpt600_wd1335.94.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-200/eos_ckpt200_wd3599.61.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-200/eos_ckpt200_wd3191.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-400/eos_ckpt400_wd2152.34.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-400/eos_ckpt400_wd2300.78.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8200/eos_ckpt8200_wd593.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8200/eos_ckpt8200_wd779.30.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2800/eos_ckpt2800_wd593.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2800/eos_ckpt2800_wd816.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5000/eos_ckpt5000_wd742.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5000/eos_ckpt5000_wd1001.95.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5800/eos_ckpt5800_wd1001.95.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-5800/eos_ckpt5800_wd927.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1000/eos_ckpt1000_wd1001.95.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1000/eos_ckpt1000_wd1484.38.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9400/eos_ckpt9400_wd1335.94.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9400/eos_ckpt9400_wd816.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7400/eos_ckpt7400_wd964.84.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7400/eos_ckpt7400_wd667.97.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9000/eos_ckpt9000_wd1373.05.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9000/eos_ckpt9000_wd1113.28.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8000/eos_ckpt8000_wd1261.72.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-8000/eos_ckpt8000_wd705.08.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2200/eos_ckpt2200_wd927.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7600/eos_ckpt7600_wd1632.81.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7600/eos_ckpt7600_wd779.30.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9200/eos_ckpt9200_wd1001.95.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-9200/eos_ckpt9200_wd630.86.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1800/eos_ckpt1800_wd1001.95.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1800/eos_ckpt1800_wd1187.50.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1400/eos_ckpt1400_wd853.52.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1400/eos_ckpt1400_wd1076.17.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4600/eos_ckpt4600_wd667.97.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4600/eos_ckpt4600_wd519.53.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-1600/eos_ckpt1600_wd1150.39.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7800/eos_ckpt7800_wd742.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-7800/eos_ckpt7800_wd890.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4800/eos_ckpt4800_wd556.64.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-4800/eos_ckpt4800_wd779.30.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2000/eos_ckpt2000_wd1224.61.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_al/ckpt-2000/eos_ckpt2000_wd927.73.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-ms-lr_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-10000-400-3000-10000-0.7-0_260118-043632/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7200/eos_ckpt7200_wd410.16.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5600/eos_ckpt5600_wd566.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2600/eos_ckpt2600_wd625.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8600/eos_ckpt8600_wd1269.53.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8800/eos_ckpt8800_wd371.09.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6000/eos_ckpt6000_wd410.16.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2400/eos_ckpt2400_wd429.69.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5200/eos_ckpt5200_wd390.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-600/eos_ckpt600_wd1269.53.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-200/eos_ckpt200_wd1621.09.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-400/eos_ckpt400_wd1289.06.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8200/eos_ckpt8200_wd292.97.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2800/eos_ckpt2800_wd351.56.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5000/eos_ckpt5000_wd507.81.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5800/eos_ckpt5800_wd566.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1000/eos_ckpt1000_wd644.53.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9400/eos_ckpt9400_wd371.09.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7400/eos_ckpt7400_wd468.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9000/eos_ckpt9000_wd312.50.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8000/eos_ckpt8000_wd664.06.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6400/eos_ckpt6400_wd390.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4400/eos_ckpt4400_wd429.69.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3200/eos_ckpt3200_wd449.22.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9600/eos_ckpt9600_wd117.19.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5400/eos_ckpt5400_wd468.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4000/eos_ckpt4000_wd546.88.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-800/eos_ckpt800_wd1210.94.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3800/eos_ckpt3800_wd410.16.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6200/eos_ckpt6200_wd546.88.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4200/eos_ckpt4200_wd468.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8400/eos_ckpt8400_wd410.16.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3000/eos_ckpt3000_wd2500.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9800/eos_ckpt9800_wd2500.00.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1200/eos_ckpt1200_wd566.41.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3400/eos_ckpt3400_wd605.47.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6800/eos_ckpt6800_wd800.78.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7000/eos_ckpt7000_wd390.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3600/eos_ckpt3600_wd351.56.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6600/eos_ckpt6600_wd527.34.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2200/eos_ckpt2200_wd468.75.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7600/eos_ckpt7600_wd546.88.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9200/eos_ckpt9200_wd390.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1800/eos_ckpt1800_wd585.94.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1400/eos_ckpt1400_wd644.53.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4600/eos_ckpt4600_wd390.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1600/eos_ckpt1600_wd1445.31.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7800/eos_ckpt7800_wd585.94.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4800/eos_ckpt4800_wd390.62.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2000/eos_ckpt2000_wd839.84.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text LR-scheduler/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7200/eos_ckpt7200_wd185.55.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5600/eos_ckpt5600_wd151.37.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2600/eos_ckpt2600_wd256.35.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8600/eos_ckpt8600_wd244.14.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8800/eos_ckpt8800_wd195.31.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6000/eos_ckpt6000_wd156.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2400/eos_ckpt2400_wd187.99.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5200/eos_ckpt5200_wd158.69.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-600/eos_ckpt600_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-200/eos_ckpt200_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-400/eos_ckpt400_wd224.61.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8200/eos_ckpt8200_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2800/eos_ckpt2800_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5000/eos_ckpt5000_wd190.43.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5800/eos_ckpt5800_wd288.09.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1000/eos_ckpt1000_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9400/eos_ckpt9400_wd273.44.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7400/eos_ckpt7400_wd139.16.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9000/eos_ckpt9000_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8000/eos_ckpt8000_wd131.84.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6400/eos_ckpt6400_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4400/eos_ckpt4400_wd266.11.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3200/eos_ckpt3200_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9600/eos_ckpt9600_wd271.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2000/eos_ckpt2000_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5400/eos_ckpt5400_wd205.08.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4000/eos_ckpt4000_wd258.79.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-800/eos_ckpt800_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3800/eos_ckpt3800_wd151.37.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6200/eos_ckpt6200_wd180.66.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4200/eos_ckpt4200_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8400/eos_ckpt8400_wd197.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3000/eos_ckpt3000_wd244.14.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9800/eos_ckpt9800_wd302.73.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1200/eos_ckpt1200_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3400/eos_ckpt3400_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6800/eos_ckpt6800_wd151.37.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7000/eos_ckpt7000_wd205.08.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3600/eos_ckpt3600_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6600/eos_ckpt6600_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2200/eos_ckpt2200_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7600/eos_ckpt7600_wd170.90.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9200/eos_ckpt9200_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1800/eos_ckpt1800_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1400/eos_ckpt1400_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4600/eos_ckpt4600_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1600/eos_ckpt1600_wd26.86.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7800/eos_ckpt7800_wd178.22.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4800/eos_ckpt4800_wd312.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.032-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7200/eos_ckpt7200_wd1562.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5600/eos_ckpt5600_wd1875.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2600/eos_ckpt2600_wd2500.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8600/eos_ckpt8600_wd1093.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8800/eos_ckpt8800_wd781.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6000/eos_ckpt6000_wd1875.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2400/eos_ckpt2400_wd1718.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5200/eos_ckpt5200_wd1406.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-600/eos_ckpt600_wd6093.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-200/eos_ckpt200_wd20000.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-400/eos_ckpt400_wd4531.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8200/eos_ckpt8200_wd1562.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2800/eos_ckpt2800_wd2187.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5000/eos_ckpt5000_wd1718.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5800/eos_ckpt5800_wd2031.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1000/eos_ckpt1000_wd3593.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9400/eos_ckpt9400_wd156.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7400/eos_ckpt7400_wd1250.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9000/eos_ckpt9000_wd1406.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8000/eos_ckpt8000_wd2968.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6400/eos_ckpt6400_wd2031.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4400/eos_ckpt4400_wd2187.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3200/eos_ckpt3200_wd2187.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9600/eos_ckpt9600_wd20000.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-5400/eos_ckpt5400_wd2187.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4000/eos_ckpt4000_wd2031.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-800/eos_ckpt800_wd4218.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3800/eos_ckpt3800_wd1718.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6200/eos_ckpt6200_wd1875.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4200/eos_ckpt4200_wd1562.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-8400/eos_ckpt8400_wd1406.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3000/eos_ckpt3000_wd2656.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9800/eos_ckpt9800_wd20000.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1200/eos_ckpt1200_wd5000.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3400/eos_ckpt3400_wd1562.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6800/eos_ckpt6800_wd2812.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7000/eos_ckpt7000_wd1562.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-3600/eos_ckpt3600_wd1718.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-6600/eos_ckpt6600_wd1250.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2200/eos_ckpt2200_wd2343.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7600/eos_ckpt7600_wd1562.50.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-9200/eos_ckpt9200_wd781.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1800/eos_ckpt1800_wd3593.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1400/eos_ckpt1400_wd2656.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4600/eos_ckpt4600_wd1406.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-1600/eos_ckpt1600_wd3125.00.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-7800/eos_ckpt7800_wd1718.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-4800/eos_ckpt4800_wd1406.25.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_al/ckpt-2000/eos_ckpt2000_wd2968.75.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text WSD-cases/nlp-ntp-pt_gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0005-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-8000-10000-0_260117-073333/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2000/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-2400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-2800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-3200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-1200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-3600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-1600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-231300/eos_cbz/ckpt-2000/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2000/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2000/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-400/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-800/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1200/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-3600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-1600/cbz_loss.png filter=lfs diff=lfs merge=lfs -text criticalBSZ-8k/nlp-ntp-pt_cbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-4000-4000-0_260124-225101/eos_cbz/ckpt-2000/cbz_loss.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-11400/eos_ckpt11400_wd166.99.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-11800/eos_ckpt11800_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-7200/eos_ckpt7200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-10200/eos_ckpt10200_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-8600/eos_ckpt8600_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-11000/eos_ckpt11000_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-8800/eos_ckpt8800_wd352.54.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-6000/eos_ckpt6000_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-11200/eos_ckpt11200_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-12000/eos_ckpt12000_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-8200/eos_ckpt8200_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd816.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd686.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-12600/eos_ckpt12600_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-9800/eos_ckpt9800_wd259.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-6800/eos_ckpt6800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-7000/eos_ckpt7000_wd352.54.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-6600/eos_ckpt6600_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-7600/eos_ckpt7600_wd816.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-9200/eos_ckpt9200_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-7800/eos_ckpt7800_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-9400/eos_ckpt9400_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-7400/eos_ckpt7400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-12400/eos_ckpt12400_wd222.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-9000/eos_ckpt9000_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-8000/eos_ckpt8000_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-6400/eos_ckpt6400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-10600/eos_ckpt10600_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-10800/eos_ckpt10800_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-9600/eos_ckpt9600_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-10000/eos_ckpt10000_wd204.10.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd1484.38.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-10400/eos_ckpt10400_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-6200/eos_ckpt6200_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-11600/eos_ckpt11600_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-12200/eos_ckpt12200_wd185.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-12800/eos_ckpt12800_wd241.21.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-8400/eos_ckpt8400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd686.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-11400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-11800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-10200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-11000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-12000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-11200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-12600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-12400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-10600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-10800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-10000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-10400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-11600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-12200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-12800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-13000-400-2000-10000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-6000/eos_ckpt6000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-6400/eos_ckpt6400_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd204.10.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-6200/eos_ckpt6200_wd74.22.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-6800/eos_ckpt6800_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd241.21.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-6600/eos_ckpt6600_wd74.22.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-7000-400-2000-4000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-6200/eos_ckpt6200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-8400/eos_ckpt8400_wd241.21.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-9800/eos_ckpt9800_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-6800/eos_ckpt6800_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-7000/eos_ckpt7000_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-6600/eos_ckpt6600_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-7600/eos_ckpt7600_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-9200/eos_ckpt9200_wd222.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-7800/eos_ckpt7800_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-7200/eos_ckpt7200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-8600/eos_ckpt8600_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-8800/eos_ckpt8800_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-6000/eos_ckpt6000_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-8200/eos_ckpt8200_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-9400/eos_ckpt9400_wd185.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-7400/eos_ckpt7400_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-9000/eos_ckpt9000_wd166.99.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-8000/eos_ckpt8000_wd241.21.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-6400/eos_ckpt6400_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_al/ckpt-9600/eos_ckpt9600_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-7000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd74.22.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd204.10.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-6000-400-2000-3000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-11400/eos_ckpt11400_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-11800/eos_ckpt11800_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-7200/eos_ckpt7200_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd686.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-10200/eos_ckpt10200_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-8600/eos_ckpt8600_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-11000/eos_ckpt11000_wd129.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-8800/eos_ckpt8800_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-6000/eos_ckpt6000_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-11200/eos_ckpt11200_wd259.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-8200/eos_ckpt8200_wd333.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-9400/eos_ckpt9400_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-9000/eos_ckpt9000_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-7400/eos_ckpt7400_wd816.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-8000/eos_ckpt8000_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-6400/eos_ckpt6400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-10600/eos_ckpt10600_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-10800/eos_ckpt10800_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-9600/eos_ckpt9600_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-10000/eos_ckpt10000_wd204.10.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-10400/eos_ckpt10400_wd74.22.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-6200/eos_ckpt6200_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-11600/eos_ckpt11600_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-8400/eos_ckpt8400_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd7403.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-9800/eos_ckpt9800_wd166.99.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-6800/eos_ckpt6800_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-7000/eos_ckpt7000_wd352.54.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-6600/eos_ckpt6600_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-7600/eos_ckpt7600_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-9200/eos_ckpt9200_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-7800/eos_ckpt7800_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-11400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-11800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-10200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-11000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-11200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-10600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-10800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-10000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-10400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-11600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-12000-400-2000-9000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-9600/eos_ckpt9600_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-10000/eos_ckpt10000_wd259.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-10400/eos_ckpt10400_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-6200/eos_ckpt6200_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-8400/eos_ckpt8400_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-9800/eos_ckpt9800_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-6800/eos_ckpt6800_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-7000/eos_ckpt7000_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-6600/eos_ckpt6600_wd333.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-7600/eos_ckpt7600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-9200/eos_ckpt9200_wd166.99.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-7200/eos_ckpt7200_wd333.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-10200/eos_ckpt10200_wd74.22.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-8600/eos_ckpt8600_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-8800/eos_ckpt8800_wd259.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-6000/eos_ckpt6000_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-8200/eos_ckpt8200_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-9400/eos_ckpt9400_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-7400/eos_ckpt7400_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-9000/eos_ckpt9000_wd129.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-8000/eos_ckpt8000_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-6400/eos_ckpt6400_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-10600/eos_ckpt10600_wd222.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-10800/eos_ckpt10800_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-7800/eos_ckpt7800_wd185.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-10200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-10600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-10800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-10000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-10400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-11000-400-2000-8000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd315.43.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd129.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-7400/eos_ckpt7400_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-8000/eos_ckpt8000_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-6400/eos_ckpt6400_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd259.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-6200/eos_ckpt6200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-8400/eos_ckpt8400_wd204.10.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd9017.58.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-6800/eos_ckpt6800_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-7000/eos_ckpt7000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-6600/eos_ckpt6600_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-7600/eos_ckpt7600_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-7800/eos_ckpt7800_wd129.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-7200/eos_ckpt7200_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-6000/eos_ckpt6000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-5000/eos_ckpt5000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-5800/eos_ckpt5800_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-7400/eos_ckpt7400_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-6400/eos_ckpt6400_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-4400/eos_ckpt4400_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-3200/eos_ckpt3200_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-5400/eos_ckpt5400_wd111.33.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-4000/eos_ckpt4000_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-3800/eos_ckpt3800_wd352.54.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-6200/eos_ckpt6200_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-4200/eos_ckpt4200_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-3000/eos_ckpt3000_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-3400/eos_ckpt3400_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-6800/eos_ckpt6800_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-7000/eos_ckpt7000_wd18.55.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-3600/eos_ckpt3600_wd315.43.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-6600/eos_ckpt6600_wd55.66.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-2200/eos_ckpt2200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-7600/eos_ckpt7600_wd92.77.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-4600/eos_ckpt4600_wd278.32.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-7800/eos_ckpt7800_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-4800/eos_ckpt4800_wd166.99.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-8000-400-2000-5000-0.1_260124-220140/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-7200/eos_ckpt7200_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-5600/eos_ckpt5600_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-2600/eos_ckpt2600_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-8600/eos_ckpt8600_wd148.44.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-8800/eos_ckpt8800_wd315.43.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-6000/eos_ckpt6000_wd9500.00.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-2400/eos_ckpt2400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-5200/eos_ckpt5200_wd333.98.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-8200/eos_ckpt8200_wd37.11.png filter=lfs diff=lfs merge=lfs -text decay-length/nlp-ntp-pt_decay-x-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-9000-400-2000-6000-0.1_260124-220140/eos_al/ckpt-2800/eos_ckpt2800_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-7200/eos_ckpt7200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-5600/eos_ckpt5600_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-2600/eos_ckpt2600_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-8600/eos_ckpt8600_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-8800/eos_ckpt8800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-6000/eos_ckpt6000_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-2400/eos_ckpt2400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-5200/eos_ckpt5200_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-8200/eos_ckpt8200_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-2800/eos_ckpt2800_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-5000/eos_ckpt5000_wd686.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-5800/eos_ckpt5800_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-9400/eos_ckpt9400_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-7400/eos_ckpt7400_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-9000/eos_ckpt9000_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-8000/eos_ckpt8000_wd834.96.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-6400/eos_ckpt6400_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-4400/eos_ckpt4400_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-3200/eos_ckpt3200_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-9600/eos_ckpt9600_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-5400/eos_ckpt5400_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-4000/eos_ckpt4000_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-3800/eos_ckpt3800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-6200/eos_ckpt6200_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-4200/eos_ckpt4200_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-8400/eos_ckpt8400_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-3000/eos_ckpt3000_wd2375.00.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-9800/eos_ckpt9800_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-3400/eos_ckpt3400_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-6800/eos_ckpt6800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-7000/eos_ckpt7000_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-3600/eos_ckpt3600_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-6600/eos_ckpt6600_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-2200/eos_ckpt2200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-7600/eos_ckpt7600_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-9200/eos_ckpt9200_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-4600/eos_ckpt4600_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-7800/eos_ckpt7800_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_al/ckpt-4800/eos_ckpt4800_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.4_260124-220611/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-7200/eos_ckpt7200_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-5600/eos_ckpt5600_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-2600/eos_ckpt2600_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-8600/eos_ckpt8600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-8800/eos_ckpt8800_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-6000/eos_ckpt6000_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-2400/eos_ckpt2400_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-5200/eos_ckpt5200_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-8200/eos_ckpt8200_wd315.43.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-2800/eos_ckpt2800_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-5000/eos_ckpt5000_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-5800/eos_ckpt5800_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-9400/eos_ckpt9400_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-7400/eos_ckpt7400_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-9000/eos_ckpt9000_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-8000/eos_ckpt8000_wd779.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-6400/eos_ckpt6400_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-4400/eos_ckpt4400_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-3200/eos_ckpt3200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-9600/eos_ckpt9600_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-5400/eos_ckpt5400_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-4000/eos_ckpt4000_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-3800/eos_ckpt3800_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-6200/eos_ckpt6200_wd779.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-4200/eos_ckpt4200_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-8400/eos_ckpt8400_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-3000/eos_ckpt3000_wd686.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-9800/eos_ckpt9800_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-3400/eos_ckpt3400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-6800/eos_ckpt6800_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-7000/eos_ckpt7000_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-3600/eos_ckpt3600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-6600/eos_ckpt6600_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-2200/eos_ckpt2200_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-7600/eos_ckpt7600_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-9200/eos_ckpt9200_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-4600/eos_ckpt4600_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-7800/eos_ckpt7800_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-4800/eos_ckpt4800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.8_260124-220611/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-5400/eos_ckpt5400_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-4000/eos_ckpt4000_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-3800/eos_ckpt3800_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-6200/eos_ckpt6200_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-4200/eos_ckpt4200_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-8400/eos_ckpt8400_wd779.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-3000/eos_ckpt3000_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-9800/eos_ckpt9800_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-3400/eos_ckpt3400_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-6800/eos_ckpt6800_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-7000/eos_ckpt7000_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-3600/eos_ckpt3600_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-6600/eos_ckpt6600_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-2200/eos_ckpt2200_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-7600/eos_ckpt7600_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-9200/eos_ckpt9200_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-4600/eos_ckpt4600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-4800/eos_ckpt4800_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-7800/eos_ckpt7800_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-7200/eos_ckpt7200_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-5600/eos_ckpt5600_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-2600/eos_ckpt2600_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-8600/eos_ckpt8600_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-8800/eos_ckpt8800_wd315.43.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-6000/eos_ckpt6000_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-2400/eos_ckpt2400_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-5200/eos_ckpt5200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-8200/eos_ckpt8200_wd333.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-2800/eos_ckpt2800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-5000/eos_ckpt5000_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-5800/eos_ckpt5800_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-9400/eos_ckpt9400_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-7400/eos_ckpt7400_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-9000/eos_ckpt9000_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-8000/eos_ckpt8000_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-6400/eos_ckpt6400_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-4400/eos_ckpt4400_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-3200/eos_ckpt3200_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-9600/eos_ckpt9600_wd333.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.2_260124-220611/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-7200/eos_ckpt7200_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-5600/eos_ckpt5600_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-2600/eos_ckpt2600_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-8600/eos_ckpt8600_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-8800/eos_ckpt8800_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-6000/eos_ckpt6000_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-2400/eos_ckpt2400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-5200/eos_ckpt5200_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-8200/eos_ckpt8200_wd352.54.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-2800/eos_ckpt2800_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-5000/eos_ckpt5000_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-5800/eos_ckpt5800_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-9400/eos_ckpt9400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-7400/eos_ckpt7400_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-9000/eos_ckpt9000_wd686.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-8000/eos_ckpt8000_wd779.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-6400/eos_ckpt6400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-4400/eos_ckpt4400_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-3200/eos_ckpt3200_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-9600/eos_ckpt9600_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-5400/eos_ckpt5400_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-4000/eos_ckpt4000_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-3800/eos_ckpt3800_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-6200/eos_ckpt6200_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-4200/eos_ckpt4200_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-8400/eos_ckpt8400_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-3000/eos_ckpt3000_wd2746.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-9800/eos_ckpt9800_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-3400/eos_ckpt3400_wd649.41.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-6800/eos_ckpt6800_wd797.85.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-7000/eos_ckpt7000_wd296.88.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-3600/eos_ckpt3600_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-6600/eos_ckpt6600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-2200/eos_ckpt2200_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-7600/eos_ckpt7600_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-9200/eos_ckpt9200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-4600/eos_ckpt4600_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-7800/eos_ckpt7800_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-4800/eos_ckpt4800_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.7_260124-220611/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-7200/eos_ckpt7200_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-5600/eos_ckpt5600_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-2600/eos_ckpt2600_wd779.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-8600/eos_ckpt8600_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-8800/eos_ckpt8800_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-6000/eos_ckpt6000_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-2400/eos_ckpt2400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-5200/eos_ckpt5200_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-8200/eos_ckpt8200_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-2800/eos_ckpt2800_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-5000/eos_ckpt5000_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-5800/eos_ckpt5800_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-9400/eos_ckpt9400_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-7400/eos_ckpt7400_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-9000/eos_ckpt9000_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-8000/eos_ckpt8000_wd686.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-6400/eos_ckpt6400_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-4400/eos_ckpt4400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-3200/eos_ckpt3200_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-9600/eos_ckpt9600_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-5400/eos_ckpt5400_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-4000/eos_ckpt4000_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-3800/eos_ckpt3800_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-6200/eos_ckpt6200_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-4200/eos_ckpt4200_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-8400/eos_ckpt8400_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-3000/eos_ckpt3000_wd2430.66.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-9800/eos_ckpt9800_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-3400/eos_ckpt3400_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-6800/eos_ckpt6800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-7000/eos_ckpt7000_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-3600/eos_ckpt3600_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-6600/eos_ckpt6600_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-2200/eos_ckpt2200_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-7600/eos_ckpt7600_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-9200/eos_ckpt9200_wd538.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-4600/eos_ckpt4600_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-7800/eos_ckpt7800_wd612.30.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-4800/eos_ckpt4800_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.5_260124-220611/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-4000/eos_ckpt4000_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-800/eos_ckpt800_wd964.84.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-3800/eos_ckpt3800_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-6200/eos_ckpt6200_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-4200/eos_ckpt4200_wd389.65.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-8400/eos_ckpt8400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-3000/eos_ckpt3000_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-9800/eos_ckpt9800_wd705.08.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-3400/eos_ckpt3400_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-6800/eos_ckpt6800_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-7000/eos_ckpt7000_wd352.54.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-3600/eos_ckpt3600_wd371.09.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-6600/eos_ckpt6600_wd500.98.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-2200/eos_ckpt2200_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-7600/eos_ckpt7600_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-9200/eos_ckpt9200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-1800/eos_ckpt1800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-1400/eos_ckpt1400_wd760.74.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-4600/eos_ckpt4600_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-1600/eos_ckpt1600_wd1076.17.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-7800/eos_ckpt7800_wd946.29.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-4800/eos_ckpt4800_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-7200/eos_ckpt7200_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-5600/eos_ckpt5600_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-2600/eos_ckpt2600_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-8600/eos_ckpt8600_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-8800/eos_ckpt8800_wd630.86.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-6000/eos_ckpt6000_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-2400/eos_ckpt2400_wd482.42.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-5200/eos_ckpt5200_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-600/eos_ckpt600_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-200/eos_ckpt200_wd1948.24.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-400/eos_ckpt400_wd1595.70.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-8200/eos_ckpt8200_wd408.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-2800/eos_ckpt2800_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-5000/eos_ckpt5000_wd667.97.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-5800/eos_ckpt5800_wd742.19.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-1000/eos_ckpt1000_wd890.62.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-9400/eos_ckpt9400_wd519.53.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-7400/eos_ckpt7400_wd463.87.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-9000/eos_ckpt9000_wd593.75.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-8000/eos_ckpt8000_wd723.63.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-6400/eos_ckpt6400_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-4400/eos_ckpt4400_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-3200/eos_ckpt3200_wd575.20.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-9600/eos_ckpt9600_wd445.31.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-5400/eos_ckpt5400_wd426.76.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_al/ckpt-2000/eos_ckpt2000_wd853.52.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-2600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-8600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-8800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-6000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-5200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-8200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-5000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-5800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-9400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-7400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-9000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-4400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-5400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-3800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-6200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-4200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-8400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-3000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-9800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-3400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-6800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-7000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-6600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-2200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-7600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-9200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-4600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-7800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text decay-ratio/nlp-ntp-pt_decay-y-gpt-xs-8k_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-10000-400-2000-8000-0.6_260124-220611/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_000_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.66-0_251222-184304/train.log filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_000_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.66-0_251222-184304/train.jsonl filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_004_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.2857-0_251222-184304/train.log filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_004_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.2857-0_251222-184304/train.jsonl filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_001_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.5-0_251222-184304/train.log filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_001_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.5-0_251222-184304/train.jsonl filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_003_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.33-0_251222-184304/train.log filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_003_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.33-0_251222-184304/train.jsonl filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_002_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.4-0_251222-184304/train.log filter=lfs diff=lfs merge=lfs -text drop-ratio/251222-184304/nlp-ntp-pt_step-factor_002_fineweb_gpt2_small-attnOgate-qkNorm_adamw-lr0.0018-b0.9-0.95-eps1e-08-wd0.1-bs512_multis-100000-400-2000-100000-0.4-0_251222-184304/train.jsonl filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3840/eos_ckpt3840_wd398.99.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1920/eos_ckpt1920_wd563.28.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5120/eos_ckpt5120_wd281.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-2560/eos_ckpt2560_wd774.51.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1280/eos_ckpt1280_wd610.22.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-4480/eos_ckpt4480_wd328.58.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3200/eos_ckpt3200_wd516.34.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-640/eos_ckpt640_wd12016.66.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5760/eos_ckpt5760_wd281.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1828/eos_ckpt1828_wd674.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1371/eos_ckpt1371_wd476.06.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3199/eos_ckpt3199_wd337.21.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2742/eos_ckpt2742_wd376.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2285/eos_ckpt2285_wd396.72.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-4113/eos_ckpt4113_wd337.21.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3656/eos_ckpt3656_wd416.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-457/eos_ckpt457_wd10155.93.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-914/eos_ckpt914_wd674.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3840/eos_ckpt3840_wd258.17.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1920/eos_ckpt1920_wd398.99.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5120/eos_ckpt5120_wd258.17.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-2560/eos_ckpt2560_wd375.52.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1280/eos_ckpt1280_wd422.46.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-4480/eos_ckpt4480_wd187.76.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3200/eos_ckpt3200_wd962.27.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-640/eos_ckpt640_wd797.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5760/eos_ckpt5760_wd258.17.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1828/eos_ckpt1828_wd535.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1371/eos_ckpt1371_wd694.25.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3199/eos_ckpt3199_wd515.73.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2742/eos_ckpt2742_wd416.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2285/eos_ckpt2285_wd158.69.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-4113/eos_ckpt4113_wd396.72.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3656/eos_ckpt3656_wd436.39.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-457/eos_ckpt457_wd833.10.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-914/eos_ckpt914_wd952.12.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1066/eos_ckpt1066_wd578.48.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3198/eos_ckpt3198_wd557.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3731/eos_ckpt3731_wd407.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2132/eos_ckpt2132_wd921.28.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4264/eos_ckpt4264_wd321.38.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-533/eos_ckpt533_wd2785.26.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2665/eos_ckpt2665_wd557.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4797/eos_ckpt4797_wd407.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1599/eos_ckpt1599_wd385.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-1602/eos_ckpt1602_wd484.79.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-1869/eos_ckpt1869_wd227.25.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-2136/eos_ckpt2136_wd363.60.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-801/eos_ckpt801_wd833.24.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-267/eos_ckpt267_wd999.89.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-534/eos_ckpt534_wd772.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-1068/eos_ckpt1068_wd424.20.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/eos_al/ckpt-1335/eos_ckpt1335_wd363.60.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260125-093459/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd236.16.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd262.40.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd708.49.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4000/eos_ckpt4000_wd367.36.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-7200/eos_ckpt7200_wd314.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-5600/eos_ckpt5600_wd78.72.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd472.33.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd288.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd2702.75.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1400/eos_ckpt1400_wd1312.01.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd1810.58.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd590.41.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-600/eos_ckpt600_wd1193.93.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1800/eos_ckpt1800_wd459.21.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1000/eos_ckpt1000_wd747.85.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-200/eos_ckpt200_wd1180.81.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd564.17.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd236.16.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/train.jsonl filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd185.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-9600/eos_ckpt9600_wd19000.00.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd371.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-11200/eos_ckpt11200_wd148.44.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-8000/eos_ckpt8000_wd222.66.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-14400/eos_ckpt14400_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd1298.83.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-12800/eos_ckpt12800_wd222.66.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd1150.39.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1828/eos_ckpt1828_wd376.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1371/eos_ckpt1371_wd575.24.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3199/eos_ckpt3199_wd2082.76.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2742/eos_ckpt2742_wd317.37.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2285/eos_ckpt2285_wd515.73.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-4113/eos_ckpt4113_wd277.70.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3656/eos_ckpt3656_wd436.39.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-457/eos_ckpt457_wd872.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-914/eos_ckpt914_wd2082.76.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3840/eos_ckpt3840_wd445.93.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1920/eos_ckpt1920_wd516.34.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5120/eos_ckpt5120_wd375.52.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-2560/eos_ckpt2560_wd797.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1280/eos_ckpt1280_wd727.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-4480/eos_ckpt4480_wd821.45.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3200/eos_ckpt3200_wd563.28.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-640/eos_ckpt640_wd2135.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5760/eos_ckpt5760_wd445.93.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd314.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd341.12.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd944.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4000/eos_ckpt4000_wd236.16.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-7200/eos_ckpt7200_wd262.40.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-5600/eos_ckpt5600_wd288.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd446.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd314.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd367.36.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2000/eos_ckpt2000_wd705.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2800/eos_ckpt2800_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd9500.00.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd705.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd500.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd389.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3600/eos_ckpt3600_wd872.07.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd723.63.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd983.40.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/train.jsonl filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd630.86.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-9600/eos_ckpt9600_wd371.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-11200/eos_ckpt11200_wd259.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-8000/eos_ckpt8000_wd593.75.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-14400/eos_ckpt14400_wd296.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-12800/eos_ckpt12800_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd1039.06.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2000/eos_ckpt2000_wd630.86.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2800/eos_ckpt2800_wd760.74.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd1781.25.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd890.62.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd1057.62.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd519.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3600/eos_ckpt3600_wd612.30.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd964.84.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd612.30.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-1920/eos_ckpt1920_wd381.70.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-2560/eos_ckpt2560_wd448.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-2240/eos_ckpt2240_wd182.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-1280/eos_ckpt1280_wd531.07.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-960/eos_ckpt960_wd713.62.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-640/eos_ckpt640_wd531.07.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-1600/eos_ckpt1600_wd398.30.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-320/eos_ckpt320_wd49.79.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/eos_al/ckpt-2880/eos_ckpt2880_wd398.30.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.00447213595499958-b0.9-0.9378956176563621-eps1e-08-wd0.1-bs640_wsd-3200-320-6400-8000-0_260125-093459/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd314.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd288.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd3883.56.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4000/eos_ckpt4000_wd367.36.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-7200/eos_ckpt7200_wd446.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-5600/eos_ckpt5600_wd236.16.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd288.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd288.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd551.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/train.jsonl filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd259.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-9600/eos_ckpt9600_wd222.66.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd185.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-11200/eos_ckpt11200_wd519.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-8000/eos_ckpt8000_wd1632.81.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-14400/eos_ckpt14400_wd185.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-12800/eos_ckpt12800_wd185.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd593.75.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1400/eos_ckpt1400_wd826.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd879.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd472.33.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-600/eos_ckpt600_wd997.13.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1800/eos_ckpt1800_wd459.21.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1000/eos_ckpt1000_wd734.73.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-200/eos_ckpt200_wd813.45.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd2624.03.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd603.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1400/eos_ckpt1400_wd328.00.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd1062.73.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd590.41.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-600/eos_ckpt600_wd682.25.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1800/eos_ckpt1800_wd564.17.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1000/eos_ckpt1000_wd577.29.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-200/eos_ckpt200_wd3870.44.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd498.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd314.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-916/eos_ckpt916_wd561.04.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-687/eos_ckpt687_wd939.74.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-458/eos_ckpt458_wd771.43.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-1374/eos_ckpt1374_wd448.83.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-1145/eos_ckpt1145_wd462.86.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-229/eos_ckpt229_wd1178.19.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-1832/eos_ckpt1832_wd448.83.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/eos_al/ckpt-1603/eos_ckpt1603_wd336.62.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0052915026221291815-b0.9-0.914147570219414-eps1e-08-wd0.1-bs896_wsd-2286-229-4571-5714-0_260125-093459/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1066/eos_ckpt1066_wd535.63.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3198/eos_ckpt3198_wd235.68.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3731/eos_ckpt3731_wd1756.86.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2132/eos_ckpt2132_wd428.50.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4264/eos_ckpt4264_wd707.03.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-533/eos_ckpt533_wd2528.16.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2665/eos_ckpt2665_wd8484.34.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4797/eos_ckpt4797_wd235.68.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1599/eos_ckpt1599_wd278.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1828/eos_ckpt1828_wd932.28.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-1371/eos_ckpt1371_wd1051.30.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3199/eos_ckpt3199_wd833.10.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2742/eos_ckpt2742_wd555.40.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-2285/eos_ckpt2285_wd833.10.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-4113/eos_ckpt4113_wd555.40.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-3656/eos_ckpt3656_wd476.06.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-457/eos_ckpt457_wd1824.89.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/eos_al/ckpt-914/eos_ckpt914_wd2538.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0037416573867739412-b0.9-0.9561106474772749-eps1e-08-wd0.1-bs448_wsd-4571-457-9143-11429-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1066/eos_ckpt1066_wd1135.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3198/eos_ckpt3198_wd492.78.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3731/eos_ckpt3731_wd385.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2132/eos_ckpt2132_wd1156.96.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4264/eos_ckpt4264_wd557.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-533/eos_ckpt533_wd2078.24.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2665/eos_ckpt2665_wd514.20.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4797/eos_ckpt4797_wd428.50.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1599/eos_ckpt1599_wd707.03.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1066/eos_ckpt1066_wd621.33.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3198/eos_ckpt3198_wd235.68.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-3731/eos_ckpt3731_wd449.93.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2132/eos_ckpt2132_wd664.18.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4264/eos_ckpt4264_wd385.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-533/eos_ckpt533_wd878.43.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-2665/eos_ckpt2665_wd299.95.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-4797/eos_ckpt4797_wd278.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/eos_al/ckpt-1599/eos_ckpt1599_wd728.45.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0034641016151377543-b0.9-0.9622606002309622-eps1e-08-wd0.1-bs384_wsd-5333-533-10667-13333-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd419.84.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd288.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd472.33.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4000/eos_ckpt4000_wd341.12.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-7200/eos_ckpt7200_wd367.36.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-5600/eos_ckpt5600_wd209.92.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd419.84.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd446.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd524.81.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1400/eos_ckpt1400_wd498.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd642.89.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd498.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1800/eos_ckpt1800_wd708.49.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-600/eos_ckpt600_wd656.01.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1000/eos_ckpt1000_wd498.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-200/eos_ckpt200_wd6717.51.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd603.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd1075.85.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3840/eos_ckpt3840_wd422.46.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1920/eos_ckpt1920_wd305.11.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5120/eos_ckpt5120_wd352.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-2560/eos_ckpt2560_wd469.40.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-1280/eos_ckpt1280_wd516.34.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-4480/eos_ckpt4480_wd305.11.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-3200/eos_ckpt3200_wd422.46.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-640/eos_ckpt640_wd680.63.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/eos_al/ckpt-5760/eos_ckpt5760_wd258.17.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0031622776601683794-b0.9-0.9684501110828384-eps1e-08-wd0.1-bs320_wsd-6400-640-12800-16000-0_260125-095909/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2000/eos_ckpt2000_wd445.31.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2800/eos_ckpt2800_wd352.54.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd1187.50.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd445.31.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd519.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd408.20.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3600/eos_ckpt3600_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd816.41.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd575.20.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd446.08.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd551.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd1259.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-4000/eos_ckpt4000_wd603.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-7200/eos_ckpt7200_wd682.25.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-5600/eos_ckpt5600_wd341.12.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd524.81.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd472.33.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd656.01.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1400/eos_ckpt1400_wd1115.21.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd708.49.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd970.89.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-600/eos_ckpt600_wd682.25.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1800/eos_ckpt1800_wd511.69.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1000/eos_ckpt1000_wd826.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-200/eos_ckpt200_wd1443.22.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd695.37.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd931.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_c4-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2000/eos_ckpt2000_wd296.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2800/eos_ckpt2800_wd222.66.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd797.85.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd630.86.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd389.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd426.76.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3600/eos_ckpt3600_wd463.87.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd630.86.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd389.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_arxiv-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/train.jsonl filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd630.86.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-9600/eos_ckpt9600_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-11200/eos_ckpt11200_wd259.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-8000/eos_ckpt8000_wd296.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-14400/eos_ckpt14400_wd593.75.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd519.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-12800/eos_ckpt12800_wd222.66.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd742.19.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_dclm-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/train.jsonl filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-4800/eos_ckpt4800_wd296.88.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-9600/eos_ckpt9600_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-6400/eos_ckpt6400_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-11200/eos_ckpt11200_wd259.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-8000/eos_ckpt8000_wd259.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-14400/eos_ckpt14400_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd445.31.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-12800/eos_ckpt12800_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd333.98.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_openwebtext-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2000/eos_ckpt2000_wd723.63.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2800/eos_ckpt2800_wd241.21.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-400/eos_ckpt400_wd1131.84.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-800/eos_ckpt800_wd408.20.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-2400/eos_ckpt2400_wd278.32.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3200/eos_ckpt3200_wd352.54.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-3600/eos_ckpt3600_wd315.43.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1200/eos_ckpt1200_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/eos_al/ckpt-1600/eos_ckpt1600_wd426.76.png filter=lfs diff=lfs merge=lfs -text eosBSZ-power-rule/nlp-ntp-pt_ecbz-gpt-xs-8k_0_wikipedia-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260125-001937/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-1602/eos_ckpt1602_wd515.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-2136/eos_ckpt2136_wd908.99.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-1869/eos_ckpt1869_wd681.74.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-801/eos_ckpt801_wd1060.49.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-267/eos_ckpt267_wd1363.49.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-534/eos_ckpt534_wd818.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-1068/eos_ckpt1068_wd984.74.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_al/ckpt-1335/eos_ckpt1335_wd545.39.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-1602/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-1869/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-2136/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-801/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-267/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-534/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-1068/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004898979485566356-b0.9-0.9259454627568515-eps1e-08-wd0.1-bs768_wsd-2667-267-5333-6667-0_260123-212139/eos_eig/ckpt-1335/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-1400/eos_ckpt1400_wd774.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-400/eos_ckpt400_wd957.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-800/eos_ckpt800_wd511.69.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-600/eos_ckpt600_wd984.01.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-1800/eos_ckpt1800_wd957.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-1000/eos_ckpt1000_wd1102.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-200/eos_ckpt200_wd1272.65.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-1200/eos_ckpt1200_wd813.45.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_al/ckpt-1600/eos_ckpt1600_wd551.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-1400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-1800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-1000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.005656854249492381-b0.9-0.9025-eps1e-08-wd0.1-bs1024_wsd-2000-200-4000-5000-0_260123-212139/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-133/eos_ckpt133_wd1349.78.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-798/eos_ckpt798_wd985.55.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-931/eos_ckpt931_wd642.75.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-1064/eos_ckpt1064_wd578.48.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-665/eos_ckpt665_wd921.28.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-532/eos_ckpt532_wd739.17.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-399/eos_ckpt399_wd803.44.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-266/eos_ckpt266_wd1489.04.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_al/ckpt-1197/eos_ckpt1197_wd1199.81.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-133/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-798/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-931/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-1064/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-665/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-532/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-399/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-266/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006928203230275509-b0.9-0.8573749999999999-eps1e-08-wd0.1-bs1536_wsd-1333-133-2667-3333-0_260123-212139/eos_eig/ckpt-1197/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-2000/eos_ckpt2000_wd575.20.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-2800/eos_ckpt2800_wd352.54.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-400/eos_ckpt400_wd1428.71.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-800/eos_ckpt800_wd1094.73.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-2400/eos_ckpt2400_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-3200/eos_ckpt3200_wd575.20.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-3600/eos_ckpt3600_wd445.31.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-1200/eos_ckpt1200_wd556.64.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_al/ckpt-1600/eos_ckpt1600_wd909.18.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-2000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-2800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-3600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-1200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.004-b0.9-0.95-eps1e-08-wd0.1-bs512_wsd-4000-400-8000-10000-0_260123-212139/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/train.jsonl filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-4800/eos_ckpt4800_wd482.42.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-9600/eos_ckpt9600_wd779.30.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-6400/eos_ckpt6400_wd2226.56.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-11200/eos_ckpt11200_wd259.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-8000/eos_ckpt8000_wd371.09.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-14400/eos_ckpt14400_wd519.53.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-3200/eos_ckpt3200_wd742.19.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-12800/eos_ckpt12800_wd445.31.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_al/ckpt-1600/eos_ckpt1600_wd964.84.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-9600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-11200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-8000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-14400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-12800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.002-b0.9-0.9872585449014338-eps1e-08-wd0.1-bs128_wsd-16000-1600-32000-40000-0_260123-212139/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-160/eos_ckpt160_wd152.56.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-800/eos_ckpt800_wd633.69.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-1280/eos_ckpt1280_wd751.04.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-960/eos_ckpt960_wd575.02.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-1120/eos_ckpt1120_wd692.37.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-480/eos_ckpt480_wd751.04.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-640/eos_ckpt640_wd1079.62.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-1440/eos_ckpt1440_wd422.46.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_al/ckpt-320/eos_ckpt320_wd739.31.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-160/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-1280/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-960/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-1120/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-480/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-640/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-1440/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.006324555320336759-b0.9-0.8796481896190089-eps1e-08-wd0.1-bs1280_wsd-1600-160-3200-4000-0_260123-212139/eos_eig/ckpt-320/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-300/eos_ckpt300_wd1363.77.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-700/eos_ckpt700_wd1224.61.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-500/eos_ckpt500_wd946.29.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-400/eos_ckpt400_wd1187.50.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-800/eos_ckpt800_wd816.41.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-600/eos_ckpt600_wd751.46.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-900/eos_ckpt900_wd742.19.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-100/eos_ckpt100_wd4239.75.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_al/ckpt-200/eos_ckpt200_wd1744.14.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-300/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-700/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-500/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-900/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-100/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.008-b0.9-0.8145062499999999-eps1e-08-wd0.1-bs2048_wsd-1000-100-2000-2500-0_260123-212139/eos_eig/ckpt-200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-4800/eos_ckpt4800_wd472.33.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-6400/eos_ckpt6400_wd419.84.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-800/eos_ckpt800_wd1810.58.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-4000/eos_ckpt4000_wd498.57.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-7200/eos_ckpt7200_wd367.36.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-5600/eos_ckpt5600_wd393.60.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-2400/eos_ckpt2400_wd524.81.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-3200/eos_ckpt3200_wd577.29.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_al/ckpt-1600/eos_ckpt1600_wd551.05.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/plots/nlp_training_info.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-4800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-6400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-800/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-4000/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-7200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-5600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-2400/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-3200/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text eosBSZ-sqrt-rule/nlp-ntp-pt_ecbz-gpt-xs-sqrt_0_fineweb-8k_gpt2-xs-attnOgate-qkNorm_adamw-lr0.0028284271247461905-b0.9-0.9746794344808963-eps1e-08-wd0.1-bs256_wsd-8000-800-16000-20000-0_260123-212139/eos_eig/ckpt-1600/eigenvector_analysis.png filter=lfs diff=lfs merge=lfs -text