diff --git a/README.md b/README.md index 7be5fc7f47d5db027d120b8024982df93db95b74..4f59b671d40c37ad376a46cf4c681e9f919fd42c 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,69 @@ ---- -license: mit ---- +--- +license: mit +library_name: pytorch +tags: + - test-time-training + - conformal-prediction + - reasoning + - early-stopping + - llm +datasets: + - wzekai99/ORCA +--- + +# ORCA TTT-Probes + +Trained Test-Time Training probes for *Online Reasoning Calibration: Test-Time Training Enables Generalizable Conformal LLM Reasoning* ([arXiv:2604.01170](https://arxiv.org/abs/2604.01170)). + +## Layout (17 probes) + +``` +qwen2.5-32b/supervised/{no_kq, qk_dh128, + qk_dh32, qk_dh64, qk_dh256, qk_dh512, + qk_dh128_ln, qk_dh128_ln_res, qk_dh128_share_kq, + qk_dh128_eta_learn, qk_dh128_mlp}/ +qwen2.5-32b/consistent/{no_kq, qk_dh128}/ +qwq-32b/supervised/{no_kq, qk_dh128}/ +llama-3.3-70b/supervised/{no_kq, qk_dh128}/ +``` + +Per probe directory: + +| File | Contents | +|-------------------|----------------------------------------------------------------| +| `probe.pt` | State dict: W0, b0, log_eta; QK variants also include theta_K, theta_Q | +| `config.json` | Training hyperparameters (d_hidden, base_lr, epochs, ...) | +| `lambdas.json` | LTT thresholds, keyed by delta | +| `metrics.json` | Step-level savings and error rate per delta | +| `ood_*.json` | Per-OOD-benchmark metrics (Qwen2.5-32B probes only) | + +## Use + +Probes are loaded by the `TTTProbe` class in https://github.com/wzekai99/ORCA. Quick example: + +```bash +hf download wzekai99/ORCA --local-dir probes +hf download wzekai99/ORCA --repo-type dataset --local-dir data +python code/test.py \ + --method ttt --no_kq \ + --dataset_path data/qwen2.5-32b/s1k.pkl \ + data/qwen2.5-32b/openr1_2k.pkl \ + data/qwen2.5-32b/deepmath_2k.pkl \ + --probe_path probes/qwen2.5-32b/supervised/no_kq/probe.pt \ + --label_mode supervised --delta 0.1 --epsilon 0.05 +``` + +## License + +MIT. + +## Citation + +```bibtex +@article{zhou2026online, + title={Online Reasoning Calibration: Test-Time Training Enables Generalizable Conformal LLM Reasoning}, + author={Zhou, Cai and Wang, Zekai and Wu, Menghua and Zhu, Qianyu Julie and Shi, Flora C and Wang, Chenyu and Wilson, Ashia and Jaakkola, Tommi and Bates, Stephen}, + journal={arXiv preprint arXiv:2604.01170}, + year={2026} +} +``` diff --git a/llama-3.3-70b/supervised/no_kq/config.json b/llama-3.3-70b/supervised/no_kq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..21f426c828f559ae41568c96613d92fd57556a98 --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/config.json @@ -0,0 +1,43 @@ +{ + "config": "configs/llama70b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/llama70b/s1k/dataset.pkl", + "data_prepare/output/llama70b/openr1_2k/dataset.pkl", + "data_prepare/output/llama70b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/llama70b/aime24/dataset.pkl", + "data_prepare/output/llama70b/aime25/dataset.pkl", + "data_prepare/output/llama70b/aime26/dataset.pkl", + "data_prepare/output/llama70b/math500/dataset.pkl", + "data_prepare/output/llama70b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/llama70b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__no_kq__lr0.01__ep40", + "d_hidden": 64, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 20, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": true, + "grad_clip": 1.0, + "force_retrain": true, + "save_every": 10, + "d_phi": 8192, + "timestamp": "2026-03-30T01:32:49.432549", + "release_target": "llama-3.3-70b/supervised/no_kq", + "release_probe_source": "llama70b_5k/supervised/ttt__no_kq__lr0.01__ep40/checkpoints/probe_ep20.pt" +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/lambdas.json b/llama-3.3-70b/supervised/no_kq/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..e179150aad2739907486b2d26696bfac709afc52 --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9382, + "0.025": 0.9159, + "0.05": 0.8886000000000001, + "0.1": 0.8489, + "0.15": 0.8142, + "0.2": 0.7734, + "0.25": 0.7363, + "0.3": 0.7017, + "0.35": 0.6558999999999999, + "0.4": 0.5794, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/metrics.json b/llama-3.3-70b/supervised/no_kq/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..906a41a105d2d9fb9b0e7cac4c3fd38b35c643b0 --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9382, + "error_rate": 0.0086, + "savings": 0.052, + "accuracy": 0.9914 + }, + "0.025": { + "lambda": 0.9159, + "error_rate": 0.0235, + "savings": 0.1457, + "accuracy": 0.9765 + }, + "0.05": { + "lambda": 0.8886000000000001, + "error_rate": 0.046, + "savings": 0.2702, + "accuracy": 0.954 + }, + "0.1": { + "lambda": 0.8489, + "error_rate": 0.0898, + "savings": 0.4238, + "accuracy": 0.9102 + }, + "0.15": { + "lambda": 0.8142, + "error_rate": 0.1305, + "savings": 0.5281, + "accuracy": 0.8695 + }, + "0.2": { + "lambda": 0.7734, + "error_rate": 0.1861, + "savings": 0.6321, + "accuracy": 0.8139 + }, + "0.25": { + "lambda": 0.7363, + "error_rate": 0.2257, + "savings": 0.7091, + "accuracy": 0.7743 + }, + "0.3": { + "lambda": 0.7017, + "error_rate": 0.2717, + "savings": 0.7679, + "accuracy": 0.7283 + }, + "0.35": { + "lambda": 0.6558999999999999, + "error_rate": 0.323, + "savings": 0.834, + "accuracy": 0.677 + }, + "0.4": { + "lambda": 0.5794, + "error_rate": 0.3775, + "savings": 0.9036, + "accuracy": 0.6225 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4075, + "savings": 0.9497, + "accuracy": 0.5925 + } + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/ood_aime24.json b/llama-3.3-70b/supervised/no_kq/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..2171776faa299cce2242b72ba2da1ca92733de7f --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9382, + "error_rate": 0.0, + "savings": 0.0024, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9159, + "error_rate": 0.0, + "savings": 0.0338, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.8886000000000001, + "error_rate": 0.0, + "savings": 0.0952, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8489, + "error_rate": 0.0435, + "savings": 0.2057, + "accuracy": 0.9565 + }, + "0.15": { + "lambda": 0.8142, + "error_rate": 0.087, + "savings": 0.3153, + "accuracy": 0.913 + }, + "0.2": { + "lambda": 0.7734, + "error_rate": 0.2174, + "savings": 0.3871, + "accuracy": 0.7826 + }, + "0.25": { + "lambda": 0.7363, + "error_rate": 0.2609, + "savings": 0.5131, + "accuracy": 0.7391 + }, + "0.3": { + "lambda": 0.7017, + "error_rate": 0.3043, + "savings": 0.5721, + "accuracy": 0.6957 + }, + "0.35": { + "lambda": 0.6558999999999999, + "error_rate": 0.3913, + "savings": 0.6936, + "accuracy": 0.6087 + }, + "0.4": { + "lambda": 0.5794, + "error_rate": 0.4783, + "savings": 0.7992, + "accuracy": 0.5217 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.5217, + "savings": 0.9626, + "accuracy": 0.4783 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/ood_aime25.json b/llama-3.3-70b/supervised/no_kq/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..9a55ce9d96260eec28de36f3ac11f16e8e5205af --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9382, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9159, + "error_rate": 0.0, + "savings": 0.0118, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.8886000000000001, + "error_rate": 0.0, + "savings": 0.1162, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8489, + "error_rate": 0.0476, + "savings": 0.2534, + "accuracy": 0.9524 + }, + "0.15": { + "lambda": 0.8142, + "error_rate": 0.0952, + "savings": 0.3326, + "accuracy": 0.9048 + }, + "0.2": { + "lambda": 0.7734, + "error_rate": 0.2381, + "savings": 0.4854, + "accuracy": 0.7619 + }, + "0.25": { + "lambda": 0.7363, + "error_rate": 0.2381, + "savings": 0.5396, + "accuracy": 0.7619 + }, + "0.3": { + "lambda": 0.7017, + "error_rate": 0.3333, + "savings": 0.7042, + "accuracy": 0.6667 + }, + "0.35": { + "lambda": 0.6558999999999999, + "error_rate": 0.4286, + "savings": 0.7611, + "accuracy": 0.5714 + }, + "0.4": { + "lambda": 0.5794, + "error_rate": 0.6667, + "savings": 0.8989, + "accuracy": 0.3333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.7619, + "savings": 0.9683, + "accuracy": 0.2381 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/ood_aime26.json b/llama-3.3-70b/supervised/no_kq/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..54767eed0fe53c9657551ac3902ee9076daa9fe1 --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9382, + "error_rate": 0.0, + "savings": 0.0131, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9159, + "error_rate": 0.0, + "savings": 0.0246, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.8886000000000001, + "error_rate": 0.0, + "savings": 0.0873, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8489, + "error_rate": 0.0385, + "savings": 0.2188, + "accuracy": 0.9615 + }, + "0.15": { + "lambda": 0.8142, + "error_rate": 0.1154, + "savings": 0.3183, + "accuracy": 0.8846 + }, + "0.2": { + "lambda": 0.7734, + "error_rate": 0.2692, + "savings": 0.5766, + "accuracy": 0.7308 + }, + "0.25": { + "lambda": 0.7363, + "error_rate": 0.3846, + "savings": 0.6703, + "accuracy": 0.6154 + }, + "0.3": { + "lambda": 0.7017, + "error_rate": 0.4231, + "savings": 0.734, + "accuracy": 0.5769 + }, + "0.35": { + "lambda": 0.6558999999999999, + "error_rate": 0.5385, + "savings": 0.8369, + "accuracy": 0.4615 + }, + "0.4": { + "lambda": 0.5794, + "error_rate": 0.6154, + "savings": 0.9442, + "accuracy": 0.3846 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.6154, + "savings": 0.9686, + "accuracy": 0.3846 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/ood_gpqa_diamond.json b/llama-3.3-70b/supervised/no_kq/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..1e4e6c070d6fc6aa5fb37af847ddeab5d1aafcb9 --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9382, + "error_rate": 0.0377, + "savings": 0.097, + "accuracy": 0.9623 + }, + "0.025": { + "lambda": 0.9159, + "error_rate": 0.0849, + "savings": 0.2106, + "accuracy": 0.9151 + }, + "0.05": { + "lambda": 0.8886000000000001, + "error_rate": 0.1887, + "savings": 0.3912, + "accuracy": 0.8113 + }, + "0.1": { + "lambda": 0.8489, + "error_rate": 0.3491, + "savings": 0.6266, + "accuracy": 0.6509 + }, + "0.15": { + "lambda": 0.8142, + "error_rate": 0.3868, + "savings": 0.7771, + "accuracy": 0.6132 + }, + "0.2": { + "lambda": 0.7734, + "error_rate": 0.4434, + "savings": 0.8936, + "accuracy": 0.5566 + }, + "0.25": { + "lambda": 0.7363, + "error_rate": 0.4528, + "savings": 0.9361, + "accuracy": 0.5472 + }, + "0.3": { + "lambda": 0.7017, + "error_rate": 0.4811, + "savings": 0.9536, + "accuracy": 0.5189 + }, + "0.35": { + "lambda": 0.6558999999999999, + "error_rate": 0.4811, + "savings": 0.9657, + "accuracy": 0.5189 + }, + "0.4": { + "lambda": 0.5794, + "error_rate": 0.4811, + "savings": 0.9695, + "accuracy": 0.5189 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4811, + "savings": 0.9695, + "accuracy": 0.5189 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/ood_math500.json b/llama-3.3-70b/supervised/no_kq/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..04f85c1dcb401f236a26f553fa9eb720f57922c6 --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9382, + "error_rate": 0.002, + "savings": 0.1291, + "accuracy": 0.998 + }, + "0.025": { + "lambda": 0.9159, + "error_rate": 0.0041, + "savings": 0.2712, + "accuracy": 0.9959 + }, + "0.05": { + "lambda": 0.8886000000000001, + "error_rate": 0.0122, + "savings": 0.4343, + "accuracy": 0.9878 + }, + "0.1": { + "lambda": 0.8489, + "error_rate": 0.0265, + "savings": 0.599, + "accuracy": 0.9735 + }, + "0.15": { + "lambda": 0.8142, + "error_rate": 0.0407, + "savings": 0.6907, + "accuracy": 0.9593 + }, + "0.2": { + "lambda": 0.7734, + "error_rate": 0.0713, + "savings": 0.7782, + "accuracy": 0.9287 + }, + "0.25": { + "lambda": 0.7363, + "error_rate": 0.0774, + "savings": 0.8149, + "accuracy": 0.9226 + }, + "0.3": { + "lambda": 0.7017, + "error_rate": 0.0957, + "savings": 0.8389, + "accuracy": 0.9043 + }, + "0.35": { + "lambda": 0.6558999999999999, + "error_rate": 0.1079, + "savings": 0.8603, + "accuracy": 0.8921 + }, + "0.4": { + "lambda": 0.5794, + "error_rate": 0.1161, + "savings": 0.8721, + "accuracy": 0.8839 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.1181, + "savings": 0.8764, + "accuracy": 0.8819 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/no_kq/probe.pt b/llama-3.3-70b/supervised/no_kq/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..c59a18d0de701a9f42e586b375079792d8fef5e7 --- /dev/null +++ b/llama-3.3-70b/supervised/no_kq/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4fd23e7f353e515c4829282b8ff92f01ce1ea5b447da6219a2a42dea3b4af8f +size 34940 diff --git a/llama-3.3-70b/supervised/qk_dh128/config.json b/llama-3.3-70b/supervised/qk_dh128/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9a7655a4b8609de1c025875d34f9e13e664d71ec --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/config.json @@ -0,0 +1,43 @@ +{ + "config": "configs/llama70b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/llama70b/s1k/dataset.pkl", + "data_prepare/output/llama70b/openr1_2k/dataset.pkl", + "data_prepare/output/llama70b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/llama70b/aime24/dataset.pkl", + "data_prepare/output/llama70b/aime25/dataset.pkl", + "data_prepare/output/llama70b/aime26/dataset.pkl", + "data_prepare/output/llama70b/math500/dataset.pkl", + "data_prepare/output/llama70b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/llama70b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01__ep40", + "d_hidden": 128, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": true, + "save_every": 10, + "d_phi": 8192, + "timestamp": "2026-03-30T01:38:20.174996", + "release_target": "llama-3.3-70b/supervised/qk_dh128", + "release_probe_source": "llama70b_5k/supervised/ttt__dh128__lr0.01__final_ep10/probe.pt" +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/lambdas.json b/llama-3.3-70b/supervised/qk_dh128/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..44424e7fdfb3b977abe12841045455fdc75eeae5 --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9969, + "0.025": 0.9913, + "0.05": 0.9856, + "0.1": 0.971, + "0.15": 0.9573, + "0.2": 0.9441, + "0.25": 0.9275, + "0.3": 0.9108, + "0.35": 0.877, + "0.4": 0.8209, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/metrics.json b/llama-3.3-70b/supervised/qk_dh128/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..8031ed11466812fb30006fbaec7ee320b52267ab --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9969, + "error_rate": 0.0053, + "savings": 0.0223, + "accuracy": 0.9947 + }, + "0.025": { + "lambda": 0.9913, + "error_rate": 0.016, + "savings": 0.0884, + "accuracy": 0.984 + }, + "0.05": { + "lambda": 0.9856, + "error_rate": 0.0385, + "savings": 0.1767, + "accuracy": 0.9615 + }, + "0.1": { + "lambda": 0.971, + "error_rate": 0.0813, + "savings": 0.378, + "accuracy": 0.9187 + }, + "0.15": { + "lambda": 0.9573, + "error_rate": 0.139, + "savings": 0.5199, + "accuracy": 0.861 + }, + "0.2": { + "lambda": 0.9441, + "error_rate": 0.1754, + "savings": 0.6083, + "accuracy": 0.8246 + }, + "0.25": { + "lambda": 0.9275, + "error_rate": 0.2235, + "savings": 0.7029, + "accuracy": 0.7765 + }, + "0.3": { + "lambda": 0.9108, + "error_rate": 0.2556, + "savings": 0.7558, + "accuracy": 0.7444 + }, + "0.35": { + "lambda": 0.877, + "error_rate": 0.3123, + "savings": 0.8364, + "accuracy": 0.6877 + }, + "0.4": { + "lambda": 0.8209, + "error_rate": 0.3679, + "savings": 0.9008, + "accuracy": 0.6321 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4075, + "savings": 0.9497, + "accuracy": 0.5925 + } + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/ood_aime24.json b/llama-3.3-70b/supervised/qk_dh128/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..65dea0345a4e79214116bf745bf5b75296ec00f7 --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9969, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9913, + "error_rate": 0.0, + "savings": 0.0245, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9856, + "error_rate": 0.0, + "savings": 0.0647, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.971, + "error_rate": 0.087, + "savings": 0.1996, + "accuracy": 0.913 + }, + "0.15": { + "lambda": 0.9573, + "error_rate": 0.1739, + "savings": 0.402, + "accuracy": 0.8261 + }, + "0.2": { + "lambda": 0.9441, + "error_rate": 0.1739, + "savings": 0.4575, + "accuracy": 0.8261 + }, + "0.25": { + "lambda": 0.9275, + "error_rate": 0.3478, + "savings": 0.5821, + "accuracy": 0.6522 + }, + "0.3": { + "lambda": 0.9108, + "error_rate": 0.3913, + "savings": 0.7312, + "accuracy": 0.6087 + }, + "0.35": { + "lambda": 0.877, + "error_rate": 0.4783, + "savings": 0.8874, + "accuracy": 0.5217 + }, + "0.4": { + "lambda": 0.8209, + "error_rate": 0.5217, + "savings": 0.927, + "accuracy": 0.4783 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.5217, + "savings": 0.9626, + "accuracy": 0.4783 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/ood_aime25.json b/llama-3.3-70b/supervised/qk_dh128/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..74cd9b0a021d770d9ec55842cb20f4f482900c68 --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9969, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9913, + "error_rate": 0.0, + "savings": 0.0292, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9856, + "error_rate": 0.0, + "savings": 0.0833, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.971, + "error_rate": 0.0952, + "savings": 0.3089, + "accuracy": 0.9048 + }, + "0.15": { + "lambda": 0.9573, + "error_rate": 0.1429, + "savings": 0.3788, + "accuracy": 0.8571 + }, + "0.2": { + "lambda": 0.9441, + "error_rate": 0.1429, + "savings": 0.424, + "accuracy": 0.8571 + }, + "0.25": { + "lambda": 0.9275, + "error_rate": 0.2857, + "savings": 0.5585, + "accuracy": 0.7143 + }, + "0.3": { + "lambda": 0.9108, + "error_rate": 0.381, + "savings": 0.6373, + "accuracy": 0.619 + }, + "0.35": { + "lambda": 0.877, + "error_rate": 0.5238, + "savings": 0.7687, + "accuracy": 0.4762 + }, + "0.4": { + "lambda": 0.8209, + "error_rate": 0.6667, + "savings": 0.9211, + "accuracy": 0.3333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.7619, + "savings": 0.9683, + "accuracy": 0.2381 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/ood_aime26.json b/llama-3.3-70b/supervised/qk_dh128/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..65f1bcb2cae1393e0bc3207652dbb23a32fd8bec --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9969, + "error_rate": 0.0, + "savings": 0.017, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9913, + "error_rate": 0.0, + "savings": 0.0263, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9856, + "error_rate": 0.0385, + "savings": 0.0995, + "accuracy": 0.9615 + }, + "0.1": { + "lambda": 0.971, + "error_rate": 0.1154, + "savings": 0.3059, + "accuracy": 0.8846 + }, + "0.15": { + "lambda": 0.9573, + "error_rate": 0.2692, + "savings": 0.5312, + "accuracy": 0.7308 + }, + "0.2": { + "lambda": 0.9441, + "error_rate": 0.3077, + "savings": 0.5872, + "accuracy": 0.6923 + }, + "0.25": { + "lambda": 0.9275, + "error_rate": 0.3462, + "savings": 0.6452, + "accuracy": 0.6538 + }, + "0.3": { + "lambda": 0.9108, + "error_rate": 0.4231, + "savings": 0.6927, + "accuracy": 0.5769 + }, + "0.35": { + "lambda": 0.877, + "error_rate": 0.5385, + "savings": 0.8578, + "accuracy": 0.4615 + }, + "0.4": { + "lambda": 0.8209, + "error_rate": 0.5769, + "savings": 0.904, + "accuracy": 0.4231 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.6154, + "savings": 0.9686, + "accuracy": 0.3846 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/ood_gpqa_diamond.json b/llama-3.3-70b/supervised/qk_dh128/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..e30b711d255e303dcb1e4d5adc5d27c6873570c9 --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9969, + "error_rate": 0.0849, + "savings": 0.1121, + "accuracy": 0.9151 + }, + "0.025": { + "lambda": 0.9913, + "error_rate": 0.1321, + "savings": 0.271, + "accuracy": 0.8679 + }, + "0.05": { + "lambda": 0.9856, + "error_rate": 0.1887, + "savings": 0.3944, + "accuracy": 0.8113 + }, + "0.1": { + "lambda": 0.971, + "error_rate": 0.2925, + "savings": 0.5771, + "accuracy": 0.7075 + }, + "0.15": { + "lambda": 0.9573, + "error_rate": 0.3774, + "savings": 0.7035, + "accuracy": 0.6226 + }, + "0.2": { + "lambda": 0.9441, + "error_rate": 0.3962, + "savings": 0.7595, + "accuracy": 0.6038 + }, + "0.25": { + "lambda": 0.9275, + "error_rate": 0.434, + "savings": 0.8436, + "accuracy": 0.566 + }, + "0.3": { + "lambda": 0.9108, + "error_rate": 0.434, + "savings": 0.8973, + "accuracy": 0.566 + }, + "0.35": { + "lambda": 0.877, + "error_rate": 0.4623, + "savings": 0.9408, + "accuracy": 0.5377 + }, + "0.4": { + "lambda": 0.8209, + "error_rate": 0.4811, + "savings": 0.9649, + "accuracy": 0.5189 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4811, + "savings": 0.9695, + "accuracy": 0.5189 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/ood_math500.json b/llama-3.3-70b/supervised/qk_dh128/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..76b1de9a8003a67d08310972e6fb937ff2e18fb4 --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9969, + "error_rate": 0.0, + "savings": 0.0922, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9913, + "error_rate": 0.0081, + "savings": 0.3619, + "accuracy": 0.9919 + }, + "0.05": { + "lambda": 0.9856, + "error_rate": 0.0224, + "savings": 0.5167, + "accuracy": 0.9776 + }, + "0.1": { + "lambda": 0.971, + "error_rate": 0.0387, + "savings": 0.6876, + "accuracy": 0.9613 + }, + "0.15": { + "lambda": 0.9573, + "error_rate": 0.0591, + "savings": 0.7632, + "accuracy": 0.9409 + }, + "0.2": { + "lambda": 0.9441, + "error_rate": 0.0815, + "savings": 0.8065, + "accuracy": 0.9185 + }, + "0.25": { + "lambda": 0.9275, + "error_rate": 0.0916, + "savings": 0.8372, + "accuracy": 0.9084 + }, + "0.3": { + "lambda": 0.9108, + "error_rate": 0.1059, + "savings": 0.8582, + "accuracy": 0.8941 + }, + "0.35": { + "lambda": 0.877, + "error_rate": 0.1141, + "savings": 0.8713, + "accuracy": 0.8859 + }, + "0.4": { + "lambda": 0.8209, + "error_rate": 0.1181, + "savings": 0.8756, + "accuracy": 0.8819 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.1181, + "savings": 0.8764, + "accuracy": 0.8819 + } +} \ No newline at end of file diff --git a/llama-3.3-70b/supervised/qk_dh128/probe.pt b/llama-3.3-70b/supervised/qk_dh128/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..ede0fa0e16642661fd5bb9538877e095794a02d4 --- /dev/null +++ b/llama-3.3-70b/supervised/qk_dh128/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ca9fb1c6798e4dabe12ac3e18522ca814000b719b5fa63e8624df5808c4268 +size 8391930 diff --git a/qwen2.5-32b/consistent/no_kq/config.json b/qwen2.5-32b/consistent/no_kq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..c187ea0a3c84a8c6160b92afadfafedcb8703d0e --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "consistent", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__no_kq__lr0.01", + "d_hidden": 64, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 20, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": true, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-27T22:40:13.431109", + "release_target": "qwen2.5-32b/consistent/no_kq", + "release_probe_source": "qwen32b_5k/consistent/ttt__no_kq__lr0.01/checkpoints/probe_ep20.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/lambdas.json b/qwen2.5-32b/consistent/no_kq/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..236c007fbfc96993374331904528e71b01505e0d --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9555, + "0.025": 0.9279, + "0.05": 0.9062, + "0.1": 0.8543000000000001, + "0.15": 0.8158, + "0.2": 0.7741, + "0.25": 0.7341, + "0.3": 0.6795, + "0.35": 0.6321, + "0.4": 0.5152, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/metrics.json b/qwen2.5-32b/consistent/no_kq/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..a68717b10a6f040ed89f5bec3ce870b206e8158a --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9555, + "error_rate": 0.011, + "savings": 0.0213, + "accuracy": 0.989 + }, + "0.025": { + "lambda": 0.9279, + "error_rate": 0.024, + "savings": 0.124, + "accuracy": 0.976 + }, + "0.05": { + "lambda": 0.9062, + "error_rate": 0.045, + "savings": 0.2197, + "accuracy": 0.955 + }, + "0.1": { + "lambda": 0.8543000000000001, + "error_rate": 0.096, + "savings": 0.4073, + "accuracy": 0.904 + }, + "0.15": { + "lambda": 0.8158, + "error_rate": 0.141, + "savings": 0.5292, + "accuracy": 0.859 + }, + "0.2": { + "lambda": 0.7741, + "error_rate": 0.193, + "savings": 0.6441, + "accuracy": 0.807 + }, + "0.25": { + "lambda": 0.7341, + "error_rate": 0.234, + "savings": 0.7307, + "accuracy": 0.766 + }, + "0.3": { + "lambda": 0.6795, + "error_rate": 0.296, + "savings": 0.8146, + "accuracy": 0.704 + }, + "0.35": { + "lambda": 0.6321, + "error_rate": 0.331, + "savings": 0.8668, + "accuracy": 0.669 + }, + "0.4": { + "lambda": 0.5152, + "error_rate": 0.371, + "savings": 0.9334, + "accuracy": 0.629 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.382, + "savings": 0.9522, + "accuracy": 0.618 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/ood_aime24.json b/qwen2.5-32b/consistent/no_kq/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..b429c38b0b1f35f1a5367806488d811fd35b1f24 --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9555, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9279, + "error_rate": 0.0333, + "savings": 0.0354, + "accuracy": 0.9667 + }, + "0.05": { + "lambda": 0.9062, + "error_rate": 0.0333, + "savings": 0.0462, + "accuracy": 0.9667 + }, + "0.1": { + "lambda": 0.8543000000000001, + "error_rate": 0.0333, + "savings": 0.1406, + "accuracy": 0.9667 + }, + "0.15": { + "lambda": 0.8158, + "error_rate": 0.0333, + "savings": 0.263, + "accuracy": 0.9667 + }, + "0.2": { + "lambda": 0.7741, + "error_rate": 0.1, + "savings": 0.4018, + "accuracy": 0.9 + }, + "0.25": { + "lambda": 0.7341, + "error_rate": 0.2667, + "savings": 0.5115, + "accuracy": 0.7333 + }, + "0.3": { + "lambda": 0.6795, + "error_rate": 0.3333, + "savings": 0.7286, + "accuracy": 0.6667 + }, + "0.35": { + "lambda": 0.6321, + "error_rate": 0.4333, + "savings": 0.8066, + "accuracy": 0.5667 + }, + "0.4": { + "lambda": 0.5152, + "error_rate": 0.4667, + "savings": 0.945, + "accuracy": 0.5333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4667, + "savings": 0.9702, + "accuracy": 0.5333 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/ood_aime25.json b/qwen2.5-32b/consistent/no_kq/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..95b3ea5e711aa5f778f15e9bed72a30670fb56e2 --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9555, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9279, + "error_rate": 0.0, + "savings": 0.0151, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9062, + "error_rate": 0.0, + "savings": 0.0186, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8543000000000001, + "error_rate": 0.0667, + "savings": 0.1661, + "accuracy": 0.9333 + }, + "0.15": { + "lambda": 0.8158, + "error_rate": 0.0667, + "savings": 0.2264, + "accuracy": 0.9333 + }, + "0.2": { + "lambda": 0.7741, + "error_rate": 0.1667, + "savings": 0.3693, + "accuracy": 0.8333 + }, + "0.25": { + "lambda": 0.7341, + "error_rate": 0.3, + "savings": 0.5924, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.6795, + "error_rate": 0.3333, + "savings": 0.7102, + "accuracy": 0.6667 + }, + "0.35": { + "lambda": 0.6321, + "error_rate": 0.4333, + "savings": 0.8036, + "accuracy": 0.5667 + }, + "0.4": { + "lambda": 0.5152, + "error_rate": 0.5333, + "savings": 0.9255, + "accuracy": 0.4667 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.6, + "savings": 0.9647, + "accuracy": 0.4 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/ood_aime26.json b/qwen2.5-32b/consistent/no_kq/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..ebc760728451781e4564fe962a069ce2b68db39b --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9555, + "error_rate": 0.0, + "savings": 0.0144, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9279, + "error_rate": 0.0, + "savings": 0.0289, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9062, + "error_rate": 0.0, + "savings": 0.0498, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8543000000000001, + "error_rate": 0.0667, + "savings": 0.1544, + "accuracy": 0.9333 + }, + "0.15": { + "lambda": 0.8158, + "error_rate": 0.1, + "savings": 0.2449, + "accuracy": 0.9 + }, + "0.2": { + "lambda": 0.7741, + "error_rate": 0.1333, + "savings": 0.3388, + "accuracy": 0.8667 + }, + "0.25": { + "lambda": 0.7341, + "error_rate": 0.3, + "savings": 0.5093, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.6795, + "error_rate": 0.3333, + "savings": 0.6242, + "accuracy": 0.6667 + }, + "0.35": { + "lambda": 0.6321, + "error_rate": 0.3333, + "savings": 0.6997, + "accuracy": 0.6667 + }, + "0.4": { + "lambda": 0.5152, + "error_rate": 0.4667, + "savings": 0.8829, + "accuracy": 0.5333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.5333, + "savings": 0.9675, + "accuracy": 0.4667 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/ood_gpqa_diamond.json b/qwen2.5-32b/consistent/no_kq/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..079b914e29c1e79aac847c16fe74f64a066033ac --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9555, + "error_rate": 0.0101, + "savings": 0.0457, + "accuracy": 0.9899 + }, + "0.025": { + "lambda": 0.9279, + "error_rate": 0.101, + "savings": 0.209, + "accuracy": 0.899 + }, + "0.05": { + "lambda": 0.9062, + "error_rate": 0.1667, + "savings": 0.3483, + "accuracy": 0.8333 + }, + "0.1": { + "lambda": 0.8543000000000001, + "error_rate": 0.3182, + "savings": 0.5983, + "accuracy": 0.6818 + }, + "0.15": { + "lambda": 0.8158, + "error_rate": 0.399, + "savings": 0.734, + "accuracy": 0.601 + }, + "0.2": { + "lambda": 0.7741, + "error_rate": 0.4495, + "savings": 0.839, + "accuracy": 0.5505 + }, + "0.25": { + "lambda": 0.7341, + "error_rate": 0.4697, + "savings": 0.8911, + "accuracy": 0.5303 + }, + "0.3": { + "lambda": 0.6795, + "error_rate": 0.4949, + "savings": 0.9306, + "accuracy": 0.5051 + }, + "0.35": { + "lambda": 0.6321, + "error_rate": 0.5101, + "savings": 0.9449, + "accuracy": 0.4899 + }, + "0.4": { + "lambda": 0.5152, + "error_rate": 0.5101, + "savings": 0.9596, + "accuracy": 0.4899 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.5101, + "savings": 0.9614, + "accuracy": 0.4899 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/ood_math500.json b/qwen2.5-32b/consistent/no_kq/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..12afb871259052994a472b6e7520e56daa538dc0 --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9555, + "error_rate": 0.0, + "savings": 0.0352, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9279, + "error_rate": 0.0, + "savings": 0.1602, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9062, + "error_rate": 0.0, + "savings": 0.2828, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8543000000000001, + "error_rate": 0.012, + "savings": 0.5554, + "accuracy": 0.988 + }, + "0.15": { + "lambda": 0.8158, + "error_rate": 0.026, + "savings": 0.6714, + "accuracy": 0.974 + }, + "0.2": { + "lambda": 0.7741, + "error_rate": 0.038, + "savings": 0.7488, + "accuracy": 0.962 + }, + "0.25": { + "lambda": 0.7341, + "error_rate": 0.052, + "savings": 0.7962, + "accuracy": 0.948 + }, + "0.3": { + "lambda": 0.6795, + "error_rate": 0.072, + "savings": 0.8429, + "accuracy": 0.928 + }, + "0.35": { + "lambda": 0.6321, + "error_rate": 0.08, + "savings": 0.8647, + "accuracy": 0.92 + }, + "0.4": { + "lambda": 0.5152, + "error_rate": 0.094, + "savings": 0.8833, + "accuracy": 0.906 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.1, + "savings": 0.8907, + "accuracy": 0.9 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/no_kq/probe.pt b/qwen2.5-32b/consistent/no_kq/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..9299256ab77bb925e14e2fe4039a6eeb1c279908 --- /dev/null +++ b/qwen2.5-32b/consistent/no_kq/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b71239aef69766c054f887fd49c714b68638c5810173f4bda9abc0c99877f31 +size 22652 diff --git a/qwen2.5-32b/consistent/qk_dh128/config.json b/qwen2.5-32b/consistent/qk_dh128/config.json new file mode 100644 index 0000000000000000000000000000000000000000..66d5f26aab9c5aec7d3ffa4185568bf1c589d883 --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "consistent", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01", + "d_hidden": 128, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-28T01:01:45.669043", + "release_target": "qwen2.5-32b/consistent/qk_dh128", + "release_probe_source": "qwen32b_5k/consistent/ttt__dh128__lr0.01/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/lambdas.json b/qwen2.5-32b/consistent/qk_dh128/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..8d38db9db51d92cf27e015d72037cd34fa7e7330 --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9921, + "0.025": 0.9767, + "0.05": 0.9482, + "0.1": 0.8952, + "0.15": 0.8351, + "0.2": 0.7674, + "0.25": 0.6921999999999999, + "0.3": 0.5946, + "0.35": 0.4928, + "0.4": 0.32909999999999995, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/metrics.json b/qwen2.5-32b/consistent/qk_dh128/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..c1f2f748a668c5909abd8edbe2b13a1404306bba --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9921, + "error_rate": 0.009, + "savings": 0.0207, + "accuracy": 0.991 + }, + "0.025": { + "lambda": 0.9767, + "error_rate": 0.033, + "savings": 0.0935, + "accuracy": 0.967 + }, + "0.05": { + "lambda": 0.9482, + "error_rate": 0.064, + "savings": 0.2315, + "accuracy": 0.936 + }, + "0.1": { + "lambda": 0.8952, + "error_rate": 0.113, + "savings": 0.3971, + "accuracy": 0.887 + }, + "0.15": { + "lambda": 0.8351, + "error_rate": 0.15, + "savings": 0.5236, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.7674, + "error_rate": 0.187, + "savings": 0.6288, + "accuracy": 0.813 + }, + "0.25": { + "lambda": 0.6921999999999999, + "error_rate": 0.227, + "savings": 0.7114, + "accuracy": 0.773 + }, + "0.3": { + "lambda": 0.5946, + "error_rate": 0.28, + "savings": 0.8033, + "accuracy": 0.72 + }, + "0.35": { + "lambda": 0.4928, + "error_rate": 0.323, + "savings": 0.8698, + "accuracy": 0.677 + }, + "0.4": { + "lambda": 0.32909999999999995, + "error_rate": 0.364, + "savings": 0.9308, + "accuracy": 0.636 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.382, + "savings": 0.9522, + "accuracy": 0.618 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/ood_aime24.json b/qwen2.5-32b/consistent/qk_dh128/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..62a570a5de3085908bb5b84401355c62d9cab334 --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9921, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9767, + "error_rate": 0.0, + "savings": 0.0527, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9482, + "error_rate": 0.0333, + "savings": 0.0913, + "accuracy": 0.9667 + }, + "0.1": { + "lambda": 0.8952, + "error_rate": 0.0333, + "savings": 0.1847, + "accuracy": 0.9667 + }, + "0.15": { + "lambda": 0.8351, + "error_rate": 0.0333, + "savings": 0.303, + "accuracy": 0.9667 + }, + "0.2": { + "lambda": 0.7674, + "error_rate": 0.1667, + "savings": 0.3927, + "accuracy": 0.8333 + }, + "0.25": { + "lambda": 0.6921999999999999, + "error_rate": 0.3, + "savings": 0.5937, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.5946, + "error_rate": 0.3333, + "savings": 0.6923, + "accuracy": 0.6667 + }, + "0.35": { + "lambda": 0.4928, + "error_rate": 0.4, + "savings": 0.8047, + "accuracy": 0.6 + }, + "0.4": { + "lambda": 0.32909999999999995, + "error_rate": 0.4667, + "savings": 0.9325, + "accuracy": 0.5333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4667, + "savings": 0.9702, + "accuracy": 0.5333 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/ood_aime25.json b/qwen2.5-32b/consistent/qk_dh128/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..58c7ea417cbbbfe03c566efb87116ec8c5d7bde0 --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9921, + "error_rate": 0.0, + "savings": 0.0028, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9767, + "error_rate": 0.0, + "savings": 0.0353, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9482, + "error_rate": 0.0, + "savings": 0.0536, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8952, + "error_rate": 0.0, + "savings": 0.1389, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8351, + "error_rate": 0.0333, + "savings": 0.2236, + "accuracy": 0.9667 + }, + "0.2": { + "lambda": 0.7674, + "error_rate": 0.1333, + "savings": 0.3198, + "accuracy": 0.8667 + }, + "0.25": { + "lambda": 0.6921999999999999, + "error_rate": 0.1667, + "savings": 0.4304, + "accuracy": 0.8333 + }, + "0.3": { + "lambda": 0.5946, + "error_rate": 0.2, + "savings": 0.5998, + "accuracy": 0.8 + }, + "0.35": { + "lambda": 0.4928, + "error_rate": 0.3333, + "savings": 0.7807, + "accuracy": 0.6667 + }, + "0.4": { + "lambda": 0.32909999999999995, + "error_rate": 0.5667, + "savings": 0.9402, + "accuracy": 0.4333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.6, + "savings": 0.9647, + "accuracy": 0.4 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/ood_aime26.json b/qwen2.5-32b/consistent/qk_dh128/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..07b720e1e189adc6ae8909547a44d026d1faaf26 --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9921, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9767, + "error_rate": 0.0, + "savings": 0.0252, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9482, + "error_rate": 0.0, + "savings": 0.055, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8952, + "error_rate": 0.0, + "savings": 0.0915, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8351, + "error_rate": 0.0333, + "savings": 0.2259, + "accuracy": 0.9667 + }, + "0.2": { + "lambda": 0.7674, + "error_rate": 0.1333, + "savings": 0.3766, + "accuracy": 0.8667 + }, + "0.25": { + "lambda": 0.6921999999999999, + "error_rate": 0.1667, + "savings": 0.4618, + "accuracy": 0.8333 + }, + "0.3": { + "lambda": 0.5946, + "error_rate": 0.2333, + "savings": 0.5934, + "accuracy": 0.7667 + }, + "0.35": { + "lambda": 0.4928, + "error_rate": 0.3333, + "savings": 0.7437, + "accuracy": 0.6667 + }, + "0.4": { + "lambda": 0.32909999999999995, + "error_rate": 0.4667, + "savings": 0.8902, + "accuracy": 0.5333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.5333, + "savings": 0.9675, + "accuracy": 0.4667 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/ood_gpqa_diamond.json b/qwen2.5-32b/consistent/qk_dh128/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..5b8b0ba3d2a68007e2109160e7df98e6b108d4ef --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9921, + "error_rate": 0.0202, + "savings": 0.0274, + "accuracy": 0.9798 + }, + "0.025": { + "lambda": 0.9767, + "error_rate": 0.0758, + "savings": 0.1833, + "accuracy": 0.9242 + }, + "0.05": { + "lambda": 0.9482, + "error_rate": 0.202, + "savings": 0.3994, + "accuracy": 0.798 + }, + "0.1": { + "lambda": 0.8952, + "error_rate": 0.3283, + "savings": 0.6526, + "accuracy": 0.6717 + }, + "0.15": { + "lambda": 0.8351, + "error_rate": 0.3889, + "savings": 0.7731, + "accuracy": 0.6111 + }, + "0.2": { + "lambda": 0.7674, + "error_rate": 0.4444, + "savings": 0.8559, + "accuracy": 0.5556 + }, + "0.25": { + "lambda": 0.6921999999999999, + "error_rate": 0.4697, + "savings": 0.8948, + "accuracy": 0.5303 + }, + "0.3": { + "lambda": 0.5946, + "error_rate": 0.4949, + "savings": 0.9192, + "accuracy": 0.5051 + }, + "0.35": { + "lambda": 0.4928, + "error_rate": 0.5101, + "savings": 0.9511, + "accuracy": 0.4899 + }, + "0.4": { + "lambda": 0.32909999999999995, + "error_rate": 0.5101, + "savings": 0.9607, + "accuracy": 0.4899 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.5101, + "savings": 0.9614, + "accuracy": 0.4899 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/ood_math500.json b/qwen2.5-32b/consistent/qk_dh128/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..0cbba8fa0d06388775635e9013f899c7687f5a36 --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9921, + "error_rate": 0.0, + "savings": 0.0768, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9767, + "error_rate": 0.002, + "savings": 0.27, + "accuracy": 0.998 + }, + "0.05": { + "lambda": 0.9482, + "error_rate": 0.008, + "savings": 0.4644, + "accuracy": 0.992 + }, + "0.1": { + "lambda": 0.8952, + "error_rate": 0.016, + "savings": 0.6371, + "accuracy": 0.984 + }, + "0.15": { + "lambda": 0.8351, + "error_rate": 0.022, + "savings": 0.7205, + "accuracy": 0.978 + }, + "0.2": { + "lambda": 0.7674, + "error_rate": 0.04, + "savings": 0.783, + "accuracy": 0.96 + }, + "0.25": { + "lambda": 0.6921999999999999, + "error_rate": 0.058, + "savings": 0.823, + "accuracy": 0.942 + }, + "0.3": { + "lambda": 0.5946, + "error_rate": 0.072, + "savings": 0.8578, + "accuracy": 0.928 + }, + "0.35": { + "lambda": 0.4928, + "error_rate": 0.086, + "savings": 0.8758, + "accuracy": 0.914 + }, + "0.4": { + "lambda": 0.32909999999999995, + "error_rate": 0.098, + "savings": 0.8893, + "accuracy": 0.902 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.1, + "savings": 0.8907, + "accuracy": 0.9 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/consistent/qk_dh128/probe.pt b/qwen2.5-32b/consistent/qk_dh128/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..89f6cdfbb8d31ce5305c1bffb89176837e748a43 --- /dev/null +++ b/qwen2.5-32b/consistent/qk_dh128/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d40434084b7f3190e0f96816cdecf4d243a3567f59100f78fc34f1ca07b6242 +size 5246202 diff --git a/qwen2.5-32b/supervised/no_kq/config.json b/qwen2.5-32b/supervised/no_kq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..92c75bbbe8da8491fbcce0e557c1309e8d7031db --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__no_kq__lr0.01", + "d_hidden": 64, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 20, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": true, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-27T19:49:22.309058", + "release_target": "qwen2.5-32b/supervised/no_kq", + "release_probe_source": "qwen32b_5k/supervised/ttt__no_kq__lr0.01/checkpoints/probe_ep20.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/lambdas.json b/qwen2.5-32b/supervised/no_kq/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..f2f482824216b09f3ad397916a850d5ac0b395bd --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9489, + "0.025": 0.9215, + "0.05": 0.8896, + "0.1": 0.8326, + "0.15": 0.7989999999999999, + "0.2": 0.7598, + "0.25": 0.7142999999999999, + "0.3": 0.6740999999999999, + "0.35": 0.6171, + "0.4": 0.5069, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/metrics.json b/qwen2.5-32b/supervised/no_kq/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..e2ee573d75481ec009f29a6a370d460596055e82 --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9489, + "error_rate": 0.01, + "savings": 0.0372, + "accuracy": 0.99 + }, + "0.025": { + "lambda": 0.9215, + "error_rate": 0.0266, + "savings": 0.1437, + "accuracy": 0.9734 + }, + "0.05": { + "lambda": 0.8896, + "error_rate": 0.0532, + "savings": 0.2817, + "accuracy": 0.9468 + }, + "0.1": { + "lambda": 0.8326, + "error_rate": 0.1098, + "savings": 0.4746, + "accuracy": 0.8902 + }, + "0.15": { + "lambda": 0.7989999999999999, + "error_rate": 0.1519, + "savings": 0.5749, + "accuracy": 0.8481 + }, + "0.2": { + "lambda": 0.7598, + "error_rate": 0.1918, + "savings": 0.6731, + "accuracy": 0.8082 + }, + "0.25": { + "lambda": 0.7142999999999999, + "error_rate": 0.2583, + "savings": 0.76, + "accuracy": 0.7417 + }, + "0.3": { + "lambda": 0.6740999999999999, + "error_rate": 0.2982, + "savings": 0.8183, + "accuracy": 0.7018 + }, + "0.35": { + "lambda": 0.6171, + "error_rate": 0.3514, + "savings": 0.8793, + "accuracy": 0.6486 + }, + "0.4": { + "lambda": 0.5069, + "error_rate": 0.388, + "savings": 0.9365, + "accuracy": 0.612 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/ood_aime24.json b/qwen2.5-32b/supervised/no_kq/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..d73552558f6d4214522337636f286031dfb6efdf --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9489, + "error_rate": 0.0, + "savings": 0.007, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9215, + "error_rate": 0.0, + "savings": 0.0411, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.8896, + "error_rate": 0.0, + "savings": 0.0837, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8326, + "error_rate": 0.15, + "savings": 0.2932, + "accuracy": 0.85 + }, + "0.15": { + "lambda": 0.7989999999999999, + "error_rate": 0.2, + "savings": 0.4065, + "accuracy": 0.8 + }, + "0.2": { + "lambda": 0.7598, + "error_rate": 0.25, + "savings": 0.4869, + "accuracy": 0.75 + }, + "0.25": { + "lambda": 0.7142999999999999, + "error_rate": 0.25, + "savings": 0.5858, + "accuracy": 0.75 + }, + "0.3": { + "lambda": 0.6740999999999999, + "error_rate": 0.3, + "savings": 0.666, + "accuracy": 0.7 + }, + "0.35": { + "lambda": 0.6171, + "error_rate": 0.35, + "savings": 0.7817, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.5069, + "error_rate": 0.55, + "savings": 0.96, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/ood_aime25.json b/qwen2.5-32b/supervised/no_kq/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..df5af9ba8b94f58b2c8a55baf9ec1c9df0203441 --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9489, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9215, + "error_rate": 0.0, + "savings": 0.0281, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.8896, + "error_rate": 0.0, + "savings": 0.0455, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8326, + "error_rate": 0.0556, + "savings": 0.265, + "accuracy": 0.9444 + }, + "0.15": { + "lambda": 0.7989999999999999, + "error_rate": 0.0556, + "savings": 0.3621, + "accuracy": 0.9444 + }, + "0.2": { + "lambda": 0.7598, + "error_rate": 0.1111, + "savings": 0.5146, + "accuracy": 0.8889 + }, + "0.25": { + "lambda": 0.7142999999999999, + "error_rate": 0.1667, + "savings": 0.6929, + "accuracy": 0.8333 + }, + "0.3": { + "lambda": 0.6740999999999999, + "error_rate": 0.3333, + "savings": 0.7742, + "accuracy": 0.6667 + }, + "0.35": { + "lambda": 0.6171, + "error_rate": 0.3333, + "savings": 0.8174, + "accuracy": 0.6667 + }, + "0.4": { + "lambda": 0.5069, + "error_rate": 0.4444, + "savings": 0.9417, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/ood_aime26.json b/qwen2.5-32b/supervised/no_kq/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..daddedcfdb236d37320b9b01b639ae2505cb5065 --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9489, + "error_rate": 0.0, + "savings": 0.0239, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9215, + "error_rate": 0.0, + "savings": 0.0305, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.8896, + "error_rate": 0.0, + "savings": 0.0744, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8326, + "error_rate": 0.05, + "savings": 0.1979, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.7989999999999999, + "error_rate": 0.15, + "savings": 0.3098, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.7598, + "error_rate": 0.3, + "savings": 0.5139, + "accuracy": 0.7 + }, + "0.25": { + "lambda": 0.7142999999999999, + "error_rate": 0.35, + "savings": 0.6549, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.6740999999999999, + "error_rate": 0.35, + "savings": 0.7077, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6171, + "error_rate": 0.4, + "savings": 0.7691, + "accuracy": 0.6 + }, + "0.4": { + "lambda": 0.5069, + "error_rate": 0.45, + "savings": 0.9326, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/no_kq/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..25171d0a4ae951efbbeb69eda4b8097c2028f6ba --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9489, + "error_rate": 0.04, + "savings": 0.1684, + "accuracy": 0.96 + }, + "0.025": { + "lambda": 0.9215, + "error_rate": 0.13, + "savings": 0.3363, + "accuracy": 0.87 + }, + "0.05": { + "lambda": 0.8896, + "error_rate": 0.21, + "savings": 0.5039, + "accuracy": 0.79 + }, + "0.1": { + "lambda": 0.8326, + "error_rate": 0.3, + "savings": 0.7154, + "accuracy": 0.7 + }, + "0.15": { + "lambda": 0.7989999999999999, + "error_rate": 0.34, + "savings": 0.8213, + "accuracy": 0.66 + }, + "0.2": { + "lambda": 0.7598, + "error_rate": 0.39, + "savings": 0.8965, + "accuracy": 0.61 + }, + "0.25": { + "lambda": 0.7142999999999999, + "error_rate": 0.41, + "savings": 0.9342, + "accuracy": 0.59 + }, + "0.3": { + "lambda": 0.6740999999999999, + "error_rate": 0.41, + "savings": 0.9494, + "accuracy": 0.59 + }, + "0.35": { + "lambda": 0.6171, + "error_rate": 0.41, + "savings": 0.9566, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.5069, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/ood_math500.json b/qwen2.5-32b/supervised/no_kq/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..0e6dced8df074d9759dd1f34c34a14b15dd65f44 --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9489, + "error_rate": 0.0, + "savings": 0.0623, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9215, + "error_rate": 0.0, + "savings": 0.2042, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.8896, + "error_rate": 0.0062, + "savings": 0.3908, + "accuracy": 0.9938 + }, + "0.1": { + "lambda": 0.8326, + "error_rate": 0.0227, + "savings": 0.637, + "accuracy": 0.9773 + }, + "0.15": { + "lambda": 0.7989999999999999, + "error_rate": 0.033, + "savings": 0.7208, + "accuracy": 0.967 + }, + "0.2": { + "lambda": 0.7598, + "error_rate": 0.0495, + "savings": 0.7815, + "accuracy": 0.9505 + }, + "0.25": { + "lambda": 0.7142999999999999, + "error_rate": 0.066, + "savings": 0.8267, + "accuracy": 0.934 + }, + "0.3": { + "lambda": 0.6740999999999999, + "error_rate": 0.068, + "savings": 0.8473, + "accuracy": 0.932 + }, + "0.35": { + "lambda": 0.6171, + "error_rate": 0.0866, + "savings": 0.8708, + "accuracy": 0.9134 + }, + "0.4": { + "lambda": 0.5069, + "error_rate": 0.0907, + "savings": 0.8823, + "accuracy": 0.9093 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/no_kq/probe.pt b/qwen2.5-32b/supervised/no_kq/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5c68be7313363cf7257609f411f2182440f5bc6 --- /dev/null +++ b/qwen2.5-32b/supervised/no_kq/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce9b16ed9382dc67d63db1ceecbbe64f512f3ce7d152313a7cc60385bb1a385 +size 22652 diff --git a/qwen2.5-32b/supervised/qk_dh128/config.json b/qwen2.5-32b/supervised/qk_dh128/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a8ff02717f6d0e70e5b62b636edc72cbbe74788 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01", + "d_hidden": 128, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-28T00:26:53.748545", + "release_target": "qwen2.5-32b/supervised/qk_dh128", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh128__lr0.01/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/lambdas.json b/qwen2.5-32b/supervised/qk_dh128/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..12a57e30d83f73d29209338ed5271ab76dea50fa --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9929, + "0.025": 0.987, + "0.05": 0.9749, + "0.1": 0.9419, + "0.15": 0.9018, + "0.2": 0.8491, + "0.25": 0.7923, + "0.3": 0.7335, + "0.35": 0.6254, + "0.4": 0.39059999999999995, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/metrics.json b/qwen2.5-32b/supervised/qk_dh128/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..fa3b71d742557c4264a694d70a7ef2d3a6653f60 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9929, + "error_rate": 0.01, + "savings": 0.0466, + "accuracy": 0.99 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0211, + "savings": 0.1107, + "accuracy": 0.9789 + }, + "0.05": { + "lambda": 0.9749, + "error_rate": 0.0455, + "savings": 0.2332, + "accuracy": 0.9545 + }, + "0.1": { + "lambda": 0.9419, + "error_rate": 0.1031, + "savings": 0.4141, + "accuracy": 0.8969 + }, + "0.15": { + "lambda": 0.9018, + "error_rate": 0.1497, + "savings": 0.5596, + "accuracy": 0.8503 + }, + "0.2": { + "lambda": 0.8491, + "error_rate": 0.204, + "savings": 0.674, + "accuracy": 0.796 + }, + "0.25": { + "lambda": 0.7923, + "error_rate": 0.2506, + "savings": 0.7552, + "accuracy": 0.7494 + }, + "0.3": { + "lambda": 0.7335, + "error_rate": 0.2905, + "savings": 0.8134, + "accuracy": 0.7095 + }, + "0.35": { + "lambda": 0.6254, + "error_rate": 0.3437, + "savings": 0.8837, + "accuracy": 0.6563 + }, + "0.4": { + "lambda": 0.39059999999999995, + "error_rate": 0.3902, + "savings": 0.9407, + "accuracy": 0.6098 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh128/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..4d8907c7cc41ee8ef820b0dfe7d656cd7845abdb --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9929, + "error_rate": 0.0, + "savings": 0.0527, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.05, + "savings": 0.1005, + "accuracy": 0.95 + }, + "0.05": { + "lambda": 0.9749, + "error_rate": 0.05, + "savings": 0.1472, + "accuracy": 0.95 + }, + "0.1": { + "lambda": 0.9419, + "error_rate": 0.1, + "savings": 0.2949, + "accuracy": 0.9 + }, + "0.15": { + "lambda": 0.9018, + "error_rate": 0.15, + "savings": 0.4545, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8491, + "error_rate": 0.2, + "savings": 0.5534, + "accuracy": 0.8 + }, + "0.25": { + "lambda": 0.7923, + "error_rate": 0.25, + "savings": 0.6954, + "accuracy": 0.75 + }, + "0.3": { + "lambda": 0.7335, + "error_rate": 0.35, + "savings": 0.7598, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6254, + "error_rate": 0.45, + "savings": 0.8599, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.39059999999999995, + "error_rate": 0.55, + "savings": 0.9566, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh128/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..055d3ac10c8d5a7ae96f86e715d1a5308dbed989 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9929, + "error_rate": 0.0, + "savings": 0.0077, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0, + "savings": 0.0446, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9749, + "error_rate": 0.0, + "savings": 0.0895, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9419, + "error_rate": 0.0, + "savings": 0.2581, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9018, + "error_rate": 0.0, + "savings": 0.3786, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.8491, + "error_rate": 0.0556, + "savings": 0.5364, + "accuracy": 0.9444 + }, + "0.25": { + "lambda": 0.7923, + "error_rate": 0.1111, + "savings": 0.612, + "accuracy": 0.8889 + }, + "0.3": { + "lambda": 0.7335, + "error_rate": 0.1667, + "savings": 0.7287, + "accuracy": 0.8333 + }, + "0.35": { + "lambda": 0.6254, + "error_rate": 0.3889, + "savings": 0.8502, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.39059999999999995, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh128/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..4dcff8903945d566d988d8453fb9bbbbab17b46d --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9929, + "error_rate": 0.0, + "savings": 0.0046, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0, + "savings": 0.0373, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9749, + "error_rate": 0.0, + "savings": 0.061, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9419, + "error_rate": 0.05, + "savings": 0.1336, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.9018, + "error_rate": 0.2, + "savings": 0.4449, + "accuracy": 0.8 + }, + "0.2": { + "lambda": 0.8491, + "error_rate": 0.35, + "savings": 0.5436, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.7923, + "error_rate": 0.35, + "savings": 0.6172, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7335, + "error_rate": 0.35, + "savings": 0.7085, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6254, + "error_rate": 0.35, + "savings": 0.8164, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.39059999999999995, + "error_rate": 0.45, + "savings": 0.9152, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh128/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..1c22497efbae8931b8bdc2838225f4743e179640 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9929, + "error_rate": 0.02, + "savings": 0.1469, + "accuracy": 0.98 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.07, + "savings": 0.2262, + "accuracy": 0.93 + }, + "0.05": { + "lambda": 0.9749, + "error_rate": 0.13, + "savings": 0.4141, + "accuracy": 0.87 + }, + "0.1": { + "lambda": 0.9419, + "error_rate": 0.21, + "savings": 0.6653, + "accuracy": 0.79 + }, + "0.15": { + "lambda": 0.9018, + "error_rate": 0.32, + "savings": 0.7824, + "accuracy": 0.68 + }, + "0.2": { + "lambda": 0.8491, + "error_rate": 0.37, + "savings": 0.8738, + "accuracy": 0.63 + }, + "0.25": { + "lambda": 0.7923, + "error_rate": 0.4, + "savings": 0.92, + "accuracy": 0.6 + }, + "0.3": { + "lambda": 0.7335, + "error_rate": 0.4, + "savings": 0.9379, + "accuracy": 0.6 + }, + "0.35": { + "lambda": 0.6254, + "error_rate": 0.41, + "savings": 0.9503, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.39059999999999995, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/ood_math500.json b/qwen2.5-32b/supervised/qk_dh128/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..ed2a895b08a5eb1973cd69df590bbb72121c033b --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9929, + "error_rate": 0.0021, + "savings": 0.221, + "accuracy": 0.9979 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0041, + "savings": 0.3514, + "accuracy": 0.9959 + }, + "0.05": { + "lambda": 0.9749, + "error_rate": 0.0124, + "savings": 0.5025, + "accuracy": 0.9876 + }, + "0.1": { + "lambda": 0.9419, + "error_rate": 0.0206, + "savings": 0.67, + "accuracy": 0.9794 + }, + "0.15": { + "lambda": 0.9018, + "error_rate": 0.033, + "savings": 0.759, + "accuracy": 0.967 + }, + "0.2": { + "lambda": 0.8491, + "error_rate": 0.0433, + "savings": 0.8072, + "accuracy": 0.9567 + }, + "0.25": { + "lambda": 0.7923, + "error_rate": 0.0701, + "savings": 0.8468, + "accuracy": 0.9299 + }, + "0.3": { + "lambda": 0.7335, + "error_rate": 0.0763, + "savings": 0.8611, + "accuracy": 0.9237 + }, + "0.35": { + "lambda": 0.6254, + "error_rate": 0.0907, + "savings": 0.8827, + "accuracy": 0.9093 + }, + "0.4": { + "lambda": 0.39059999999999995, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128/probe.pt b/qwen2.5-32b/supervised/qk_dh128/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b68155933835003142887c5ae449ea1bdcc2180 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a627981f31496677d011bbbfda9fde98daa563ace5022de644edebeda71c5e29 +size 5246202 diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/config.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d406b417d1c2ab6c17adf733ef395198eba5309c --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01__eta_learn", + "d_hidden": 128, + "use_ln": false, + "use_residual": false, + "learnable_eta": true, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-28T00:58:19.727054", + "release_target": "qwen2.5-32b/supervised/qk_dh128_eta_learn", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh128__lr0.01__eta_learn/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/lambdas.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..5c378ca00a6fb649bf282670cdf2837022003d34 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9925, + "0.025": 0.987, + "0.05": 0.973, + "0.1": 0.9414, + "0.15": 0.9117999999999999, + "0.2": 0.8633, + "0.25": 0.8122, + "0.3": 0.7483, + "0.35": 0.6309, + "0.4": 0.4034, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/metrics.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..28f138269a7e9afdb1a246f2e027d6aa72a746ee --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9925, + "error_rate": 0.0089, + "savings": 0.0493, + "accuracy": 0.9911 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0222, + "savings": 0.1172, + "accuracy": 0.9778 + }, + "0.05": { + "lambda": 0.973, + "error_rate": 0.0499, + "savings": 0.2488, + "accuracy": 0.9501 + }, + "0.1": { + "lambda": 0.9414, + "error_rate": 0.1086, + "savings": 0.4213, + "accuracy": 0.8914 + }, + "0.15": { + "lambda": 0.9117999999999999, + "error_rate": 0.1375, + "savings": 0.5415, + "accuracy": 0.8625 + }, + "0.2": { + "lambda": 0.8633, + "error_rate": 0.1907, + "savings": 0.6656, + "accuracy": 0.8093 + }, + "0.25": { + "lambda": 0.8122, + "error_rate": 0.2406, + "savings": 0.7462, + "accuracy": 0.7594 + }, + "0.3": { + "lambda": 0.7483, + "error_rate": 0.2894, + "savings": 0.814, + "accuracy": 0.7106 + }, + "0.35": { + "lambda": 0.6309, + "error_rate": 0.3492, + "savings": 0.8879, + "accuracy": 0.6508 + }, + "0.4": { + "lambda": 0.4034, + "error_rate": 0.3869, + "savings": 0.9391, + "accuracy": 0.6131 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..222499950a34a0fd6541c9483e62335a6ba78f79 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9925, + "error_rate": 0.0, + "savings": 0.0402, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0, + "savings": 0.1031, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.973, + "error_rate": 0.0, + "savings": 0.1266, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9414, + "error_rate": 0.15, + "savings": 0.3637, + "accuracy": 0.85 + }, + "0.15": { + "lambda": 0.9117999999999999, + "error_rate": 0.15, + "savings": 0.4632, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8633, + "error_rate": 0.2, + "savings": 0.5678, + "accuracy": 0.8 + }, + "0.25": { + "lambda": 0.8122, + "error_rate": 0.35, + "savings": 0.6529, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7483, + "error_rate": 0.35, + "savings": 0.7639, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6309, + "error_rate": 0.45, + "savings": 0.8687, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.4034, + "error_rate": 0.55, + "savings": 0.955, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..4cda075d66173b9c06e6c6fde70328ea50c09e12 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9925, + "error_rate": 0.0, + "savings": 0.0121, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0, + "savings": 0.0414, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.973, + "error_rate": 0.0, + "savings": 0.062, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9414, + "error_rate": 0.0, + "savings": 0.2595, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9117999999999999, + "error_rate": 0.0, + "savings": 0.3642, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.8633, + "error_rate": 0.1111, + "savings": 0.5012, + "accuracy": 0.8889 + }, + "0.25": { + "lambda": 0.8122, + "error_rate": 0.1111, + "savings": 0.5721, + "accuracy": 0.8889 + }, + "0.3": { + "lambda": 0.7483, + "error_rate": 0.2222, + "savings": 0.7155, + "accuracy": 0.7778 + }, + "0.35": { + "lambda": 0.6309, + "error_rate": 0.3333, + "savings": 0.8815, + "accuracy": 0.6667 + }, + "0.4": { + "lambda": 0.4034, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..64e52bcd538a628af3aaa55aacd8b41b699e7965 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9925, + "error_rate": 0.0, + "savings": 0.0048, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0, + "savings": 0.0403, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.973, + "error_rate": 0.0, + "savings": 0.0766, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9414, + "error_rate": 0.0, + "savings": 0.1038, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9117999999999999, + "error_rate": 0.3, + "savings": 0.373, + "accuracy": 0.7 + }, + "0.2": { + "lambda": 0.8633, + "error_rate": 0.35, + "savings": 0.5338, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8122, + "error_rate": 0.35, + "savings": 0.5832, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7483, + "error_rate": 0.35, + "savings": 0.7532, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6309, + "error_rate": 0.35, + "savings": 0.8357, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.4034, + "error_rate": 0.45, + "savings": 0.9227, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..9b84f7ae54a7f3fe5e047840012b15e8179b25b1 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9925, + "error_rate": 0.06, + "savings": 0.1511, + "accuracy": 0.94 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.09, + "savings": 0.2501, + "accuracy": 0.91 + }, + "0.05": { + "lambda": 0.973, + "error_rate": 0.14, + "savings": 0.4311, + "accuracy": 0.86 + }, + "0.1": { + "lambda": 0.9414, + "error_rate": 0.22, + "savings": 0.6563, + "accuracy": 0.78 + }, + "0.15": { + "lambda": 0.9117999999999999, + "error_rate": 0.27, + "savings": 0.7512, + "accuracy": 0.73 + }, + "0.2": { + "lambda": 0.8633, + "error_rate": 0.35, + "savings": 0.8339, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8122, + "error_rate": 0.38, + "savings": 0.8978, + "accuracy": 0.62 + }, + "0.3": { + "lambda": 0.7483, + "error_rate": 0.41, + "savings": 0.9344, + "accuracy": 0.59 + }, + "0.35": { + "lambda": 0.6309, + "error_rate": 0.41, + "savings": 0.9539, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.4034, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_math500.json b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..442e9773a17701675818608e04d91c4782472c95 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9925, + "error_rate": 0.0021, + "savings": 0.2478, + "accuracy": 0.9979 + }, + "0.025": { + "lambda": 0.987, + "error_rate": 0.0041, + "savings": 0.3646, + "accuracy": 0.9959 + }, + "0.05": { + "lambda": 0.973, + "error_rate": 0.0103, + "savings": 0.5334, + "accuracy": 0.9897 + }, + "0.1": { + "lambda": 0.9414, + "error_rate": 0.0206, + "savings": 0.6785, + "accuracy": 0.9794 + }, + "0.15": { + "lambda": 0.9117999999999999, + "error_rate": 0.0309, + "savings": 0.7491, + "accuracy": 0.9691 + }, + "0.2": { + "lambda": 0.8633, + "error_rate": 0.0474, + "savings": 0.8017, + "accuracy": 0.9526 + }, + "0.25": { + "lambda": 0.8122, + "error_rate": 0.068, + "savings": 0.8414, + "accuracy": 0.932 + }, + "0.3": { + "lambda": 0.7483, + "error_rate": 0.0784, + "savings": 0.8621, + "accuracy": 0.9216 + }, + "0.35": { + "lambda": 0.6309, + "error_rate": 0.0907, + "savings": 0.881, + "accuracy": 0.9093 + }, + "0.4": { + "lambda": 0.4034, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_eta_learn/probe.pt b/qwen2.5-32b/supervised/qk_dh128_eta_learn/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..66fac4ca9f3502fa70a0053c0c69ae166a7945b4 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_eta_learn/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222dc2195b8a23b289141f2c1f576e4074324c465063c9074f7bc91599245c15 +size 5266937 diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/config.json b/qwen2.5-32b/supervised/qk_dh128_ln/config.json new file mode 100644 index 0000000000000000000000000000000000000000..59a1851bd3f988837a5cdd19a09327e607805eca --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01__ln", + "d_hidden": 128, + "use_ln": true, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-28T03:12:16.792585", + "release_target": "qwen2.5-32b/supervised/qk_dh128_ln", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh128__lr0.01__ln/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/lambdas.json b/qwen2.5-32b/supervised/qk_dh128_ln/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..92d4a086b98080232746485884ad1be812804569 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9879, + "0.025": 0.9778, + "0.05": 0.9634, + "0.1": 0.9285, + "0.15": 0.891, + "0.2": 0.8473999999999999, + "0.25": 0.8033, + "0.3": 0.7463, + "0.35": 0.6433, + "0.4": 0.40359999999999996, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/metrics.json b/qwen2.5-32b/supervised/qk_dh128_ln/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..da6b8f9ba1fd1a1a582cad36e737f5e3c3a9fb3f --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9879, + "error_rate": 0.0144, + "savings": 0.082, + "accuracy": 0.9856 + }, + "0.025": { + "lambda": 0.9778, + "error_rate": 0.0244, + "savings": 0.1761, + "accuracy": 0.9756 + }, + "0.05": { + "lambda": 0.9634, + "error_rate": 0.0521, + "savings": 0.2812, + "accuracy": 0.9479 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0953, + "savings": 0.4506, + "accuracy": 0.9047 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.1475, + "savings": 0.5803, + "accuracy": 0.8525 + }, + "0.2": { + "lambda": 0.8473999999999999, + "error_rate": 0.2018, + "savings": 0.6864, + "accuracy": 0.7982 + }, + "0.25": { + "lambda": 0.8033, + "error_rate": 0.2417, + "savings": 0.7492, + "accuracy": 0.7583 + }, + "0.3": { + "lambda": 0.7463, + "error_rate": 0.2916, + "savings": 0.8199, + "accuracy": 0.7084 + }, + "0.35": { + "lambda": 0.6433, + "error_rate": 0.3437, + "savings": 0.8866, + "accuracy": 0.6563 + }, + "0.4": { + "lambda": 0.40359999999999996, + "error_rate": 0.3925, + "savings": 0.9429, + "accuracy": 0.6075 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..28398063ada960667bbcac0798f963783c38c189 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9879, + "error_rate": 0.0, + "savings": 0.0156, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9778, + "error_rate": 0.0, + "savings": 0.0461, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9634, + "error_rate": 0.0, + "savings": 0.0687, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0, + "savings": 0.2652, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.15, + "savings": 0.4255, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8473999999999999, + "error_rate": 0.25, + "savings": 0.5955, + "accuracy": 0.75 + }, + "0.25": { + "lambda": 0.8033, + "error_rate": 0.35, + "savings": 0.6902, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7463, + "error_rate": 0.35, + "savings": 0.718, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6433, + "error_rate": 0.45, + "savings": 0.8833, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.40359999999999996, + "error_rate": 0.55, + "savings": 0.9598, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..66d79d0a3ecbde0b8649cafcd6feff48a3f588d6 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9879, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9778, + "error_rate": 0.0, + "savings": 0.0147, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9634, + "error_rate": 0.0, + "savings": 0.0591, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0, + "savings": 0.2557, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.0, + "savings": 0.355, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.8473999999999999, + "error_rate": 0.0556, + "savings": 0.478, + "accuracy": 0.9444 + }, + "0.25": { + "lambda": 0.8033, + "error_rate": 0.0556, + "savings": 0.5771, + "accuracy": 0.9444 + }, + "0.3": { + "lambda": 0.7463, + "error_rate": 0.1111, + "savings": 0.6821, + "accuracy": 0.8889 + }, + "0.35": { + "lambda": 0.6433, + "error_rate": 0.3889, + "savings": 0.8938, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.40359999999999996, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..5ae503dc9f60acc731d8095fc7feea830f975652 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9879, + "error_rate": 0.0, + "savings": 0.0011, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9778, + "error_rate": 0.0, + "savings": 0.0061, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9634, + "error_rate": 0.0, + "savings": 0.0589, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.05, + "savings": 0.1438, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.25, + "savings": 0.4637, + "accuracy": 0.75 + }, + "0.2": { + "lambda": 0.8473999999999999, + "error_rate": 0.35, + "savings": 0.5429, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8033, + "error_rate": 0.35, + "savings": 0.6194, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7463, + "error_rate": 0.35, + "savings": 0.7112, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6433, + "error_rate": 0.35, + "savings": 0.83, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.40359999999999996, + "error_rate": 0.45, + "savings": 0.9177, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh128_ln/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..cbff9a2c76393d3076488a1dd5124676b6b72858 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9879, + "error_rate": 0.04, + "savings": 0.1991, + "accuracy": 0.96 + }, + "0.025": { + "lambda": 0.9778, + "error_rate": 0.11, + "savings": 0.3702, + "accuracy": 0.89 + }, + "0.05": { + "lambda": 0.9634, + "error_rate": 0.19, + "savings": 0.5222, + "accuracy": 0.81 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.25, + "savings": 0.7261, + "accuracy": 0.75 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.31, + "savings": 0.8063, + "accuracy": 0.69 + }, + "0.2": { + "lambda": 0.8473999999999999, + "error_rate": 0.38, + "savings": 0.8858, + "accuracy": 0.62 + }, + "0.25": { + "lambda": 0.8033, + "error_rate": 0.4, + "savings": 0.9215, + "accuracy": 0.6 + }, + "0.3": { + "lambda": 0.7463, + "error_rate": 0.41, + "savings": 0.9413, + "accuracy": 0.59 + }, + "0.35": { + "lambda": 0.6433, + "error_rate": 0.41, + "savings": 0.95, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.40359999999999996, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/ood_math500.json b/qwen2.5-32b/supervised/qk_dh128_ln/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..5a21c6625818d51ae844936314657e720ea44545 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9879, + "error_rate": 0.0021, + "savings": 0.3155, + "accuracy": 0.9979 + }, + "0.025": { + "lambda": 0.9778, + "error_rate": 0.0062, + "savings": 0.4566, + "accuracy": 0.9938 + }, + "0.05": { + "lambda": 0.9634, + "error_rate": 0.0124, + "savings": 0.5708, + "accuracy": 0.9876 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0206, + "savings": 0.6966, + "accuracy": 0.9794 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.0351, + "savings": 0.7691, + "accuracy": 0.9649 + }, + "0.2": { + "lambda": 0.8473999999999999, + "error_rate": 0.0495, + "savings": 0.8127, + "accuracy": 0.9505 + }, + "0.25": { + "lambda": 0.8033, + "error_rate": 0.0722, + "savings": 0.8413, + "accuracy": 0.9278 + }, + "0.3": { + "lambda": 0.7463, + "error_rate": 0.0804, + "savings": 0.8615, + "accuracy": 0.9196 + }, + "0.35": { + "lambda": 0.6433, + "error_rate": 0.0866, + "savings": 0.8763, + "accuracy": 0.9134 + }, + "0.4": { + "lambda": 0.40359999999999996, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln/probe.pt b/qwen2.5-32b/supervised/qk_dh128_ln/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..a48ac5bdd9e65bfc94a66c7974ba71422b490446 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97a40c03431898c8d7b266557e2f83aa4b3e1d60e227e0b672f74c26dcc07c31 +size 5247736 diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/config.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f1a1061752605e1bbc9eab08f825c97ff337b493 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01__ln__res", + "d_hidden": 128, + "use_ln": true, + "use_residual": true, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-28T03:19:32.678675", + "release_target": "qwen2.5-32b/supervised/qk_dh128_ln_res", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh128__lr0.01__ln__res/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/lambdas.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..4f19f3fbe956ff490b025b6cfc743cb938ce1328 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9878, + "0.025": 0.9777, + "0.05": 0.9633, + "0.1": 0.9285, + "0.15": 0.891, + "0.2": 0.8478, + "0.25": 0.8031, + "0.3": 0.7461, + "0.35": 0.6437999999999999, + "0.4": 0.404, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/metrics.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..9150c638a53d98049875119f7b00eafcf7e1033a --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9878, + "error_rate": 0.0144, + "savings": 0.0821, + "accuracy": 0.9856 + }, + "0.025": { + "lambda": 0.9777, + "error_rate": 0.0255, + "savings": 0.1765, + "accuracy": 0.9745 + }, + "0.05": { + "lambda": 0.9633, + "error_rate": 0.0521, + "savings": 0.2813, + "accuracy": 0.9479 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0942, + "savings": 0.4498, + "accuracy": 0.9058 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.1463, + "savings": 0.5795, + "accuracy": 0.8537 + }, + "0.2": { + "lambda": 0.8478, + "error_rate": 0.2007, + "savings": 0.6854, + "accuracy": 0.7993 + }, + "0.25": { + "lambda": 0.8031, + "error_rate": 0.2428, + "savings": 0.7496, + "accuracy": 0.7572 + }, + "0.3": { + "lambda": 0.7461, + "error_rate": 0.2916, + "savings": 0.82, + "accuracy": 0.7084 + }, + "0.35": { + "lambda": 0.6437999999999999, + "error_rate": 0.3437, + "savings": 0.8861, + "accuracy": 0.6563 + }, + "0.4": { + "lambda": 0.404, + "error_rate": 0.3925, + "savings": 0.9429, + "accuracy": 0.6075 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..c052bfbb49622c02dc1959c4b777740119d1381f --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9878, + "error_rate": 0.0, + "savings": 0.0156, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9777, + "error_rate": 0.0, + "savings": 0.0461, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9633, + "error_rate": 0.0, + "savings": 0.0687, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0, + "savings": 0.2652, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.15, + "savings": 0.4255, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8478, + "error_rate": 0.25, + "savings": 0.5955, + "accuracy": 0.75 + }, + "0.25": { + "lambda": 0.8031, + "error_rate": 0.35, + "savings": 0.6902, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7461, + "error_rate": 0.35, + "savings": 0.718, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6437999999999999, + "error_rate": 0.4, + "savings": 0.8608, + "accuracy": 0.6 + }, + "0.4": { + "lambda": 0.404, + "error_rate": 0.55, + "savings": 0.9598, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..7013781d63a919e890b88ab9fa8141cdd478d1ec --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9878, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9777, + "error_rate": 0.0, + "savings": 0.0147, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9633, + "error_rate": 0.0, + "savings": 0.0591, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0, + "savings": 0.2557, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.0, + "savings": 0.355, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.8478, + "error_rate": 0.0556, + "savings": 0.478, + "accuracy": 0.9444 + }, + "0.25": { + "lambda": 0.8031, + "error_rate": 0.0556, + "savings": 0.5778, + "accuracy": 0.9444 + }, + "0.3": { + "lambda": 0.7461, + "error_rate": 0.1111, + "savings": 0.6821, + "accuracy": 0.8889 + }, + "0.35": { + "lambda": 0.6437999999999999, + "error_rate": 0.3889, + "savings": 0.8938, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.404, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..5f97421027855409a0db76ca6edaa940ccf36f8f --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9878, + "error_rate": 0.0, + "savings": 0.0011, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9777, + "error_rate": 0.0, + "savings": 0.0061, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9633, + "error_rate": 0.0, + "savings": 0.06, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.05, + "savings": 0.1438, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.25, + "savings": 0.4637, + "accuracy": 0.75 + }, + "0.2": { + "lambda": 0.8478, + "error_rate": 0.35, + "savings": 0.5429, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8031, + "error_rate": 0.35, + "savings": 0.6194, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7461, + "error_rate": 0.35, + "savings": 0.7112, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6437999999999999, + "error_rate": 0.35, + "savings": 0.83, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.404, + "error_rate": 0.45, + "savings": 0.9177, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..09c122d2b7d2c316b2dbbca9d9a66cc326d2e435 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9878, + "error_rate": 0.04, + "savings": 0.1995, + "accuracy": 0.96 + }, + "0.025": { + "lambda": 0.9777, + "error_rate": 0.11, + "savings": 0.373, + "accuracy": 0.89 + }, + "0.05": { + "lambda": 0.9633, + "error_rate": 0.19, + "savings": 0.5228, + "accuracy": 0.81 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.25, + "savings": 0.7258, + "accuracy": 0.75 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.31, + "savings": 0.8063, + "accuracy": 0.69 + }, + "0.2": { + "lambda": 0.8478, + "error_rate": 0.38, + "savings": 0.8851, + "accuracy": 0.62 + }, + "0.25": { + "lambda": 0.8031, + "error_rate": 0.4, + "savings": 0.9215, + "accuracy": 0.6 + }, + "0.3": { + "lambda": 0.7461, + "error_rate": 0.41, + "savings": 0.9413, + "accuracy": 0.59 + }, + "0.35": { + "lambda": 0.6437999999999999, + "error_rate": 0.41, + "savings": 0.95, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.404, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_math500.json b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..a99ee1a4a7b8d216689d8ef66ef67462a48cf03f --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9878, + "error_rate": 0.0021, + "savings": 0.3161, + "accuracy": 0.9979 + }, + "0.025": { + "lambda": 0.9777, + "error_rate": 0.0062, + "savings": 0.4569, + "accuracy": 0.9938 + }, + "0.05": { + "lambda": 0.9633, + "error_rate": 0.0124, + "savings": 0.5708, + "accuracy": 0.9876 + }, + "0.1": { + "lambda": 0.9285, + "error_rate": 0.0206, + "savings": 0.6965, + "accuracy": 0.9794 + }, + "0.15": { + "lambda": 0.891, + "error_rate": 0.0351, + "savings": 0.7691, + "accuracy": 0.9649 + }, + "0.2": { + "lambda": 0.8478, + "error_rate": 0.0474, + "savings": 0.8109, + "accuracy": 0.9526 + }, + "0.25": { + "lambda": 0.8031, + "error_rate": 0.0722, + "savings": 0.8413, + "accuracy": 0.9278 + }, + "0.3": { + "lambda": 0.7461, + "error_rate": 0.0804, + "savings": 0.8615, + "accuracy": 0.9196 + }, + "0.35": { + "lambda": 0.6437999999999999, + "error_rate": 0.0866, + "savings": 0.8759, + "accuracy": 0.9134 + }, + "0.4": { + "lambda": 0.404, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_ln_res/probe.pt b/qwen2.5-32b/supervised/qk_dh128_ln_res/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..80ca8568bd5e9ada58ac105a5afc752dc286aadd --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_ln_res/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a453c5fe5786f0b6d0c5ed25e781947e976fb218bc597ea5769a8e41f7628f4 +size 5247736 diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/config.json b/qwen2.5-32b/supervised/qk_dh128_mlp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..3d993e5db237f10a14e286dfa3dfaa9447417b9d --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01__mlp", + "d_hidden": 128, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": true, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-27T23:33:03.161628", + "release_target": "qwen2.5-32b/supervised/qk_dh128_mlp", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh128__lr0.01__mlp/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/lambdas.json b/qwen2.5-32b/supervised/qk_dh128_mlp/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..0da94f7e3f7974384bd3708792e65ea99f56f742 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9977, + "0.025": 0.9908, + "0.05": 0.9781, + "0.1": 0.9504, + "0.15": 0.9077, + "0.2": 0.8631, + "0.25": 0.8051, + "0.3": 0.7336, + "0.35": 0.6086, + "0.4": 0.33420000000000005, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/metrics.json b/qwen2.5-32b/supervised/qk_dh128_mlp/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..76f19c66f5e3c55ce1433704eb5e087c87258743 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9977, + "error_rate": 0.0022, + "savings": 0.0455, + "accuracy": 0.9978 + }, + "0.025": { + "lambda": 0.9908, + "error_rate": 0.0244, + "savings": 0.1612, + "accuracy": 0.9756 + }, + "0.05": { + "lambda": 0.9781, + "error_rate": 0.0488, + "savings": 0.2802, + "accuracy": 0.9512 + }, + "0.1": { + "lambda": 0.9504, + "error_rate": 0.0931, + "savings": 0.4405, + "accuracy": 0.9069 + }, + "0.15": { + "lambda": 0.9077, + "error_rate": 0.1563, + "savings": 0.5844, + "accuracy": 0.8437 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.2018, + "savings": 0.6763, + "accuracy": 0.7982 + }, + "0.25": { + "lambda": 0.8051, + "error_rate": 0.2539, + "savings": 0.7588, + "accuracy": 0.7461 + }, + "0.3": { + "lambda": 0.7336, + "error_rate": 0.3038, + "savings": 0.8208, + "accuracy": 0.6962 + }, + "0.35": { + "lambda": 0.6086, + "error_rate": 0.3514, + "savings": 0.8849, + "accuracy": 0.6486 + }, + "0.4": { + "lambda": 0.33420000000000005, + "error_rate": 0.3891, + "savings": 0.9407, + "accuracy": 0.6109 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..6d152af1ec8663f37390c3ec519931389db83bb6 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9977, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9908, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9781, + "error_rate": 0.0, + "savings": 0.1112, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9504, + "error_rate": 0.05, + "savings": 0.2576, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.9077, + "error_rate": 0.15, + "savings": 0.3685, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.15, + "savings": 0.4916, + "accuracy": 0.85 + }, + "0.25": { + "lambda": 0.8051, + "error_rate": 0.3, + "savings": 0.6709, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.7336, + "error_rate": 0.35, + "savings": 0.7452, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6086, + "error_rate": 0.45, + "savings": 0.8481, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.33420000000000005, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..9d37778ee8db71652865f38f700c8f2d5855ab89 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9977, + "error_rate": 0.0, + "savings": 0.05, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9908, + "error_rate": 0.0, + "savings": 0.1, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9781, + "error_rate": 0.0, + "savings": 0.1, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9504, + "error_rate": 0.0, + "savings": 0.2306, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9077, + "error_rate": 0.0556, + "savings": 0.4336, + "accuracy": 0.9444 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.1111, + "savings": 0.513, + "accuracy": 0.8889 + }, + "0.25": { + "lambda": 0.8051, + "error_rate": 0.1667, + "savings": 0.581, + "accuracy": 0.8333 + }, + "0.3": { + "lambda": 0.7336, + "error_rate": 0.2222, + "savings": 0.7171, + "accuracy": 0.7778 + }, + "0.35": { + "lambda": 0.6086, + "error_rate": 0.3889, + "savings": 0.8892, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.33420000000000005, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..e95fb2eb74f309091ccbfd9dd3f80d466538d7c8 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9977, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9908, + "error_rate": 0.0, + "savings": 0.0464, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9781, + "error_rate": 0.15, + "savings": 0.1951, + "accuracy": 0.85 + }, + "0.1": { + "lambda": 0.9504, + "error_rate": 0.15, + "savings": 0.3248, + "accuracy": 0.85 + }, + "0.15": { + "lambda": 0.9077, + "error_rate": 0.2, + "savings": 0.4197, + "accuracy": 0.8 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.35, + "savings": 0.575, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8051, + "error_rate": 0.35, + "savings": 0.6104, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7336, + "error_rate": 0.35, + "savings": 0.6607, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6086, + "error_rate": 0.35, + "savings": 0.8294, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.33420000000000005, + "error_rate": 0.45, + "savings": 0.9274, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..da59dc4d7e8202e125e55f4806897d7c2d750672 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9977, + "error_rate": 0.01, + "savings": 0.0173, + "accuracy": 0.99 + }, + "0.025": { + "lambda": 0.9908, + "error_rate": 0.03, + "savings": 0.1422, + "accuracy": 0.97 + }, + "0.05": { + "lambda": 0.9781, + "error_rate": 0.12, + "savings": 0.3682, + "accuracy": 0.88 + }, + "0.1": { + "lambda": 0.9504, + "error_rate": 0.21, + "savings": 0.6333, + "accuracy": 0.79 + }, + "0.15": { + "lambda": 0.9077, + "error_rate": 0.29, + "savings": 0.7675, + "accuracy": 0.71 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.37, + "savings": 0.8539, + "accuracy": 0.63 + }, + "0.25": { + "lambda": 0.8051, + "error_rate": 0.38, + "savings": 0.8905, + "accuracy": 0.62 + }, + "0.3": { + "lambda": 0.7336, + "error_rate": 0.4, + "savings": 0.9286, + "accuracy": 0.6 + }, + "0.35": { + "lambda": 0.6086, + "error_rate": 0.41, + "savings": 0.9495, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.33420000000000005, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/ood_math500.json b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..223b49fa3e2a20ec93cb9d9e8a75973a6d33ce1d --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9977, + "error_rate": 0.0062, + "savings": 0.4401, + "accuracy": 0.9938 + }, + "0.025": { + "lambda": 0.9908, + "error_rate": 0.0082, + "savings": 0.5505, + "accuracy": 0.9918 + }, + "0.05": { + "lambda": 0.9781, + "error_rate": 0.0144, + "savings": 0.6223, + "accuracy": 0.9856 + }, + "0.1": { + "lambda": 0.9504, + "error_rate": 0.0289, + "savings": 0.7165, + "accuracy": 0.9711 + }, + "0.15": { + "lambda": 0.9077, + "error_rate": 0.0371, + "savings": 0.7792, + "accuracy": 0.9629 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.0557, + "savings": 0.8197, + "accuracy": 0.9443 + }, + "0.25": { + "lambda": 0.8051, + "error_rate": 0.0639, + "savings": 0.8403, + "accuracy": 0.9361 + }, + "0.3": { + "lambda": 0.7336, + "error_rate": 0.0804, + "savings": 0.8597, + "accuracy": 0.9196 + }, + "0.35": { + "lambda": 0.6086, + "error_rate": 0.0866, + "savings": 0.8745, + "accuracy": 0.9134 + }, + "0.4": { + "lambda": 0.33420000000000005, + "error_rate": 0.0928, + "savings": 0.8866, + "accuracy": 0.9072 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_mlp/probe.pt b/qwen2.5-32b/supervised/qk_dh128_mlp/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..a30982cc978bdc891f0373d46b4de887e60914b2 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_mlp/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929f02637512f5a973557740b813972c68ed1327553494a618ce42704c3918bc +size 5512440 diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/config.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a47a7ef822dcda0be6ac410f6166300a6c6057b --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01__share_kq", + "d_hidden": 128, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": true, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-28T00:31:01.520056", + "release_target": "qwen2.5-32b/supervised/qk_dh128_share_kq", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh128__lr0.01__share_kq/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/lambdas.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..f5eba5c9a0c9a7348e331ad67611335280c64679 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9881, + "0.025": 0.9772, + "0.05": 0.964, + "0.1": 0.9283, + "0.15": 0.8914, + "0.2": 0.8469, + "0.25": 0.8032, + "0.3": 0.7479, + "0.35": 0.6461, + "0.4": 0.4014, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/metrics.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..1f9ccf10eba0dd6a8c4fe9f48fcecb06b2cbeaa0 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9881, + "error_rate": 0.0144, + "savings": 0.0801, + "accuracy": 0.9856 + }, + "0.025": { + "lambda": 0.9772, + "error_rate": 0.0255, + "savings": 0.1791, + "accuracy": 0.9745 + }, + "0.05": { + "lambda": 0.964, + "error_rate": 0.0499, + "savings": 0.2765, + "accuracy": 0.9501 + }, + "0.1": { + "lambda": 0.9283, + "error_rate": 0.0942, + "savings": 0.4494, + "accuracy": 0.9058 + }, + "0.15": { + "lambda": 0.8914, + "error_rate": 0.1452, + "savings": 0.5779, + "accuracy": 0.8548 + }, + "0.2": { + "lambda": 0.8469, + "error_rate": 0.2029, + "savings": 0.6873, + "accuracy": 0.7971 + }, + "0.25": { + "lambda": 0.8032, + "error_rate": 0.2439, + "savings": 0.7504, + "accuracy": 0.7561 + }, + "0.3": { + "lambda": 0.7479, + "error_rate": 0.2905, + "savings": 0.8179, + "accuracy": 0.7095 + }, + "0.35": { + "lambda": 0.6461, + "error_rate": 0.3426, + "savings": 0.8851, + "accuracy": 0.6574 + }, + "0.4": { + "lambda": 0.4014, + "error_rate": 0.3925, + "savings": 0.9429, + "accuracy": 0.6075 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..068dbc1a5644b4cd290751a72d2945ef34bc9694 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9881, + "error_rate": 0.0, + "savings": 0.0149, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9772, + "error_rate": 0.0, + "savings": 0.0454, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.964, + "error_rate": 0.0, + "savings": 0.0524, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9283, + "error_rate": 0.0, + "savings": 0.2636, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8914, + "error_rate": 0.15, + "savings": 0.4239, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8469, + "error_rate": 0.25, + "savings": 0.5955, + "accuracy": 0.75 + }, + "0.25": { + "lambda": 0.8032, + "error_rate": 0.35, + "savings": 0.6853, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7479, + "error_rate": 0.35, + "savings": 0.718, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6461, + "error_rate": 0.4, + "savings": 0.8615, + "accuracy": 0.6 + }, + "0.4": { + "lambda": 0.4014, + "error_rate": 0.55, + "savings": 0.9598, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..d3b3f8cb6b02c536d376684074d0dfbc90e7dd38 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9881, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9772, + "error_rate": 0.0, + "savings": 0.0147, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.964, + "error_rate": 0.0, + "savings": 0.0574, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9283, + "error_rate": 0.0, + "savings": 0.2557, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8914, + "error_rate": 0.0, + "savings": 0.355, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.8469, + "error_rate": 0.0556, + "savings": 0.4774, + "accuracy": 0.9444 + }, + "0.25": { + "lambda": 0.8032, + "error_rate": 0.0556, + "savings": 0.5771, + "accuracy": 0.9444 + }, + "0.3": { + "lambda": 0.7479, + "error_rate": 0.1111, + "savings": 0.683, + "accuracy": 0.8889 + }, + "0.35": { + "lambda": 0.6461, + "error_rate": 0.3889, + "savings": 0.8929, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.4014, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..ba3116bd07750a7bfe12d9acd9fe73ba0cbe3a80 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9881, + "error_rate": 0.0, + "savings": 0.0011, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9772, + "error_rate": 0.0, + "savings": 0.005, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.964, + "error_rate": 0.0, + "savings": 0.0562, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9283, + "error_rate": 0.05, + "savings": 0.1438, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.8914, + "error_rate": 0.25, + "savings": 0.4626, + "accuracy": 0.75 + }, + "0.2": { + "lambda": 0.8469, + "error_rate": 0.35, + "savings": 0.5427, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8032, + "error_rate": 0.35, + "savings": 0.6184, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7479, + "error_rate": 0.35, + "savings": 0.7047, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6461, + "error_rate": 0.35, + "savings": 0.83, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.4014, + "error_rate": 0.45, + "savings": 0.9177, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..c144de303d2a685deed6ac48cdd93b01e9a6bd01 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9881, + "error_rate": 0.04, + "savings": 0.1979, + "accuracy": 0.96 + }, + "0.025": { + "lambda": 0.9772, + "error_rate": 0.11, + "savings": 0.3781, + "accuracy": 0.89 + }, + "0.05": { + "lambda": 0.964, + "error_rate": 0.19, + "savings": 0.5196, + "accuracy": 0.81 + }, + "0.1": { + "lambda": 0.9283, + "error_rate": 0.25, + "savings": 0.7239, + "accuracy": 0.75 + }, + "0.15": { + "lambda": 0.8914, + "error_rate": 0.3, + "savings": 0.8029, + "accuracy": 0.7 + }, + "0.2": { + "lambda": 0.8469, + "error_rate": 0.38, + "savings": 0.8863, + "accuracy": 0.62 + }, + "0.25": { + "lambda": 0.8032, + "error_rate": 0.4, + "savings": 0.9215, + "accuracy": 0.6 + }, + "0.3": { + "lambda": 0.7479, + "error_rate": 0.41, + "savings": 0.9412, + "accuracy": 0.59 + }, + "0.35": { + "lambda": 0.6461, + "error_rate": 0.41, + "savings": 0.95, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.4014, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_math500.json b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..b9352e44bf55c70c4ef8a9633dc0f60a4fca1c59 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9881, + "error_rate": 0.0021, + "savings": 0.3108, + "accuracy": 0.9979 + }, + "0.025": { + "lambda": 0.9772, + "error_rate": 0.0062, + "savings": 0.4632, + "accuracy": 0.9938 + }, + "0.05": { + "lambda": 0.964, + "error_rate": 0.0124, + "savings": 0.5678, + "accuracy": 0.9876 + }, + "0.1": { + "lambda": 0.9283, + "error_rate": 0.0206, + "savings": 0.6979, + "accuracy": 0.9794 + }, + "0.15": { + "lambda": 0.8914, + "error_rate": 0.0351, + "savings": 0.7682, + "accuracy": 0.9649 + }, + "0.2": { + "lambda": 0.8469, + "error_rate": 0.0495, + "savings": 0.8129, + "accuracy": 0.9505 + }, + "0.25": { + "lambda": 0.8032, + "error_rate": 0.0722, + "savings": 0.843, + "accuracy": 0.9278 + }, + "0.3": { + "lambda": 0.7479, + "error_rate": 0.0804, + "savings": 0.8606, + "accuracy": 0.9196 + }, + "0.35": { + "lambda": 0.6461, + "error_rate": 0.0866, + "savings": 0.8761, + "accuracy": 0.9134 + }, + "0.4": { + "lambda": 0.4014, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh128_share_kq/probe.pt b/qwen2.5-32b/supervised/qk_dh128_share_kq/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab0ca2bbf909642cfd814a67a105f219afd1d48a --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh128_share_kq/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38d4459a0e10f2d01ca02f41a45ddfb7fa9dfc221fa6c11a05f0b5e16937be3c +size 2624571 diff --git a/qwen2.5-32b/supervised/qk_dh256/config.json b/qwen2.5-32b/supervised/qk_dh256/config.json new file mode 100644 index 0000000000000000000000000000000000000000..bdd754a6f092dfb4440a9f5f98b340b6871bd81b --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh256__lr0.01", + "d_hidden": 256, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-27T22:02:24.432809", + "release_target": "qwen2.5-32b/supervised/qk_dh256", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh256__lr0.01/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/lambdas.json b/qwen2.5-32b/supervised/qk_dh256/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..5e49a39d94aa41b4c87f0bd30b148c93c6d310a6 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9927, + "0.025": 0.9878, + "0.05": 0.9744, + "0.1": 0.9445, + "0.15": 0.9021, + "0.2": 0.8631, + "0.25": 0.8036, + "0.3": 0.7436, + "0.35": 0.6396999999999999, + "0.4": 0.40459999999999996, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/metrics.json b/qwen2.5-32b/supervised/qk_dh256/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..48ec433673698369aa22ede33ee8d2b60f58dbab --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9927, + "error_rate": 0.0078, + "savings": 0.0443, + "accuracy": 0.9922 + }, + "0.025": { + "lambda": 0.9878, + "error_rate": 0.0177, + "savings": 0.0972, + "accuracy": 0.9823 + }, + "0.05": { + "lambda": 0.9744, + "error_rate": 0.0554, + "savings": 0.2347, + "accuracy": 0.9446 + }, + "0.1": { + "lambda": 0.9445, + "error_rate": 0.1042, + "savings": 0.4075, + "accuracy": 0.8958 + }, + "0.15": { + "lambda": 0.9021, + "error_rate": 0.153, + "savings": 0.5673, + "accuracy": 0.847 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.1896, + "savings": 0.6568, + "accuracy": 0.8104 + }, + "0.25": { + "lambda": 0.8036, + "error_rate": 0.2417, + "savings": 0.7476, + "accuracy": 0.7583 + }, + "0.3": { + "lambda": 0.7436, + "error_rate": 0.2938, + "savings": 0.8107, + "accuracy": 0.7062 + }, + "0.35": { + "lambda": 0.6396999999999999, + "error_rate": 0.3404, + "savings": 0.8849, + "accuracy": 0.6596 + }, + "0.4": { + "lambda": 0.40459999999999996, + "error_rate": 0.3902, + "savings": 0.9417, + "accuracy": 0.6098 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh256/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..b1aaadb5189742d2a5784aefde0b00db4fe1c3f5 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9927, + "error_rate": 0.0, + "savings": 0.0477, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9878, + "error_rate": 0.0, + "savings": 0.0854, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9744, + "error_rate": 0.05, + "savings": 0.1526, + "accuracy": 0.95 + }, + "0.1": { + "lambda": 0.9445, + "error_rate": 0.05, + "savings": 0.2374, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.9021, + "error_rate": 0.1, + "savings": 0.4578, + "accuracy": 0.9 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.2, + "savings": 0.5416, + "accuracy": 0.8 + }, + "0.25": { + "lambda": 0.8036, + "error_rate": 0.3, + "savings": 0.7236, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.7436, + "error_rate": 0.35, + "savings": 0.7575, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6396999999999999, + "error_rate": 0.45, + "savings": 0.8718, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.40459999999999996, + "error_rate": 0.55, + "savings": 0.9566, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh256/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..804f2c8aeb04b2a8bf674c2e002331956b6943ad --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9927, + "error_rate": 0.0, + "savings": 0.0077, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9878, + "error_rate": 0.0, + "savings": 0.0277, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9744, + "error_rate": 0.0, + "savings": 0.0638, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9445, + "error_rate": 0.0, + "savings": 0.2117, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9021, + "error_rate": 0.0556, + "savings": 0.368, + "accuracy": 0.9444 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.0556, + "savings": 0.4905, + "accuracy": 0.9444 + }, + "0.25": { + "lambda": 0.8036, + "error_rate": 0.1111, + "savings": 0.6063, + "accuracy": 0.8889 + }, + "0.3": { + "lambda": 0.7436, + "error_rate": 0.1667, + "savings": 0.7273, + "accuracy": 0.8333 + }, + "0.35": { + "lambda": 0.6396999999999999, + "error_rate": 0.3889, + "savings": 0.8506, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.40459999999999996, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh256/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..1bab9dfd5c4fbeff636e90fbf15181357e5a1ebd --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9927, + "error_rate": 0.0, + "savings": 0.0021, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9878, + "error_rate": 0.0, + "savings": 0.0194, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9744, + "error_rate": 0.0, + "savings": 0.0497, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9445, + "error_rate": 0.0, + "savings": 0.1095, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9021, + "error_rate": 0.3, + "savings": 0.4424, + "accuracy": 0.7 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.35, + "savings": 0.5006, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8036, + "error_rate": 0.35, + "savings": 0.5915, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7436, + "error_rate": 0.35, + "savings": 0.6999, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6396999999999999, + "error_rate": 0.35, + "savings": 0.8013, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.40459999999999996, + "error_rate": 0.5, + "savings": 0.9262, + "accuracy": 0.5 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh256/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..ce779263aacadafc6ce549e169969329f0dff8dd --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9927, + "error_rate": 0.04, + "savings": 0.1487, + "accuracy": 0.96 + }, + "0.025": { + "lambda": 0.9878, + "error_rate": 0.09, + "savings": 0.2102, + "accuracy": 0.91 + }, + "0.05": { + "lambda": 0.9744, + "error_rate": 0.14, + "savings": 0.4339, + "accuracy": 0.86 + }, + "0.1": { + "lambda": 0.9445, + "error_rate": 0.24, + "savings": 0.6371, + "accuracy": 0.76 + }, + "0.15": { + "lambda": 0.9021, + "error_rate": 0.32, + "savings": 0.7778, + "accuracy": 0.68 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.36, + "savings": 0.8424, + "accuracy": 0.64 + }, + "0.25": { + "lambda": 0.8036, + "error_rate": 0.4, + "savings": 0.9165, + "accuracy": 0.6 + }, + "0.3": { + "lambda": 0.7436, + "error_rate": 0.41, + "savings": 0.9377, + "accuracy": 0.59 + }, + "0.35": { + "lambda": 0.6396999999999999, + "error_rate": 0.41, + "savings": 0.9496, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.40459999999999996, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/ood_math500.json b/qwen2.5-32b/supervised/qk_dh256/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..635ae8698be215fd8124e85eaa7cb8ce81402b74 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9927, + "error_rate": 0.0021, + "savings": 0.2167, + "accuracy": 0.9979 + }, + "0.025": { + "lambda": 0.9878, + "error_rate": 0.0041, + "savings": 0.328, + "accuracy": 0.9959 + }, + "0.05": { + "lambda": 0.9744, + "error_rate": 0.0103, + "savings": 0.5114, + "accuracy": 0.9897 + }, + "0.1": { + "lambda": 0.9445, + "error_rate": 0.0206, + "savings": 0.6659, + "accuracy": 0.9794 + }, + "0.15": { + "lambda": 0.9021, + "error_rate": 0.0309, + "savings": 0.7587, + "accuracy": 0.9691 + }, + "0.2": { + "lambda": 0.8631, + "error_rate": 0.0412, + "savings": 0.8024, + "accuracy": 0.9588 + }, + "0.25": { + "lambda": 0.8036, + "error_rate": 0.0701, + "savings": 0.8447, + "accuracy": 0.9299 + }, + "0.3": { + "lambda": 0.7436, + "error_rate": 0.0804, + "savings": 0.8643, + "accuracy": 0.9196 + }, + "0.35": { + "lambda": 0.6396999999999999, + "error_rate": 0.0907, + "savings": 0.8806, + "accuracy": 0.9093 + }, + "0.4": { + "lambda": 0.40459999999999996, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh256/probe.pt b/qwen2.5-32b/supervised/qk_dh256/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd57659da99a8710e49d932f3f02b1525e09af6b --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh256/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc5533884e9db2dc0439ad38fe4f2815ef20a0d2213ac64eb5a317c23a8303f4 +size 10489594 diff --git a/qwen2.5-32b/supervised/qk_dh32/config.json b/qwen2.5-32b/supervised/qk_dh32/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ae381e9d4118f97f27e0cc12a7b5d8fbbb805225 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/config.json @@ -0,0 +1,43 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh32__lr0.01", + "d_hidden": 32, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": true, + "save_every": 10, + "d_phi": 5120, + "timestamp": "2026-03-29T08:58:38.296728", + "release_target": "qwen2.5-32b/supervised/qk_dh32", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh32__lr0.01/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/lambdas.json b/qwen2.5-32b/supervised/qk_dh32/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..d930a0437f23e28be8ba9a25db303779f993b049 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9891, + "0.025": 0.9809, + "0.05": 0.9704, + "0.1": 0.9317, + "0.15": 0.8916, + "0.2": 0.8528, + "0.25": 0.7907, + "0.3": 0.72, + "0.35": 0.6114999999999999, + "0.4": 0.3852, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/metrics.json b/qwen2.5-32b/supervised/qk_dh32/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..3149261552957aeb580022accd2576bd98fcb222 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9891, + "error_rate": 0.0144, + "savings": 0.0752, + "accuracy": 0.9856 + }, + "0.025": { + "lambda": 0.9809, + "error_rate": 0.0322, + "savings": 0.1672, + "accuracy": 0.9678 + }, + "0.05": { + "lambda": 0.9704, + "error_rate": 0.0532, + "savings": 0.2503, + "accuracy": 0.9468 + }, + "0.1": { + "lambda": 0.9317, + "error_rate": 0.1053, + "savings": 0.44, + "accuracy": 0.8947 + }, + "0.15": { + "lambda": 0.8916, + "error_rate": 0.1475, + "savings": 0.576, + "accuracy": 0.8525 + }, + "0.2": { + "lambda": 0.8528, + "error_rate": 0.1907, + "savings": 0.6654, + "accuracy": 0.8093 + }, + "0.25": { + "lambda": 0.7907, + "error_rate": 0.245, + "savings": 0.7491, + "accuracy": 0.755 + }, + "0.3": { + "lambda": 0.72, + "error_rate": 0.3038, + "savings": 0.8248, + "accuracy": 0.6962 + }, + "0.35": { + "lambda": 0.6114999999999999, + "error_rate": 0.3459, + "savings": 0.884, + "accuracy": 0.6541 + }, + "0.4": { + "lambda": 0.3852, + "error_rate": 0.3891, + "savings": 0.941, + "accuracy": 0.6109 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh32/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..ee86fcd40eb5b40376f52b91aa544999ed6a4f60 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9891, + "error_rate": 0.05, + "savings": 0.071, + "accuracy": 0.95 + }, + "0.025": { + "lambda": 0.9809, + "error_rate": 0.05, + "savings": 0.1275, + "accuracy": 0.95 + }, + "0.05": { + "lambda": 0.9704, + "error_rate": 0.05, + "savings": 0.1532, + "accuracy": 0.95 + }, + "0.1": { + "lambda": 0.9317, + "error_rate": 0.1, + "savings": 0.3524, + "accuracy": 0.9 + }, + "0.15": { + "lambda": 0.8916, + "error_rate": 0.15, + "savings": 0.4417, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8528, + "error_rate": 0.25, + "savings": 0.5187, + "accuracy": 0.75 + }, + "0.25": { + "lambda": 0.7907, + "error_rate": 0.3, + "savings": 0.6398, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.72, + "error_rate": 0.35, + "savings": 0.7473, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6114999999999999, + "error_rate": 0.45, + "savings": 0.8549, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.3852, + "error_rate": 0.55, + "savings": 0.9536, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh32/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..4220cc96c03ebf028ae8f1d4ff8e514f6399fa56 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9891, + "error_rate": 0.0, + "savings": 0.0462, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9809, + "error_rate": 0.0, + "savings": 0.0732, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9704, + "error_rate": 0.0, + "savings": 0.1247, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9317, + "error_rate": 0.0, + "savings": 0.2966, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8916, + "error_rate": 0.1111, + "savings": 0.4133, + "accuracy": 0.8889 + }, + "0.2": { + "lambda": 0.8528, + "error_rate": 0.1667, + "savings": 0.5198, + "accuracy": 0.8333 + }, + "0.25": { + "lambda": 0.7907, + "error_rate": 0.1667, + "savings": 0.622, + "accuracy": 0.8333 + }, + "0.3": { + "lambda": 0.72, + "error_rate": 0.1667, + "savings": 0.7415, + "accuracy": 0.8333 + }, + "0.35": { + "lambda": 0.6114999999999999, + "error_rate": 0.3889, + "savings": 0.8914, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.3852, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh32/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..34625fbb8a1db6dc2372b5a0524e5491282f71da --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9891, + "error_rate": 0.0, + "savings": 0.0274, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9809, + "error_rate": 0.0, + "savings": 0.0442, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9704, + "error_rate": 0.0, + "savings": 0.0839, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9317, + "error_rate": 0.1, + "savings": 0.2044, + "accuracy": 0.9 + }, + "0.15": { + "lambda": 0.8916, + "error_rate": 0.25, + "savings": 0.5066, + "accuracy": 0.75 + }, + "0.2": { + "lambda": 0.8528, + "error_rate": 0.3, + "savings": 0.5395, + "accuracy": 0.7 + }, + "0.25": { + "lambda": 0.7907, + "error_rate": 0.35, + "savings": 0.6489, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.72, + "error_rate": 0.35, + "savings": 0.7262, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6114999999999999, + "error_rate": 0.35, + "savings": 0.8312, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.3852, + "error_rate": 0.45, + "savings": 0.926, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh32/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..323cbc176a86e6a0a123b2550aff9452dc9037a9 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9891, + "error_rate": 0.04, + "savings": 0.1444, + "accuracy": 0.96 + }, + "0.025": { + "lambda": 0.9809, + "error_rate": 0.07, + "savings": 0.2621, + "accuracy": 0.93 + }, + "0.05": { + "lambda": 0.9704, + "error_rate": 0.12, + "savings": 0.3661, + "accuracy": 0.88 + }, + "0.1": { + "lambda": 0.9317, + "error_rate": 0.22, + "savings": 0.6449, + "accuracy": 0.78 + }, + "0.15": { + "lambda": 0.8916, + "error_rate": 0.27, + "savings": 0.7514, + "accuracy": 0.73 + }, + "0.2": { + "lambda": 0.8528, + "error_rate": 0.31, + "savings": 0.8062, + "accuracy": 0.69 + }, + "0.25": { + "lambda": 0.7907, + "error_rate": 0.38, + "savings": 0.8898, + "accuracy": 0.62 + }, + "0.3": { + "lambda": 0.72, + "error_rate": 0.39, + "savings": 0.9251, + "accuracy": 0.61 + }, + "0.35": { + "lambda": 0.6114999999999999, + "error_rate": 0.41, + "savings": 0.9447, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.3852, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/ood_math500.json b/qwen2.5-32b/supervised/qk_dh32/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..883ebae38dce37c59a6ac9d1f52286c1f01f1195 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9891, + "error_rate": 0.0041, + "savings": 0.2898, + "accuracy": 0.9959 + }, + "0.025": { + "lambda": 0.9809, + "error_rate": 0.0062, + "savings": 0.4232, + "accuracy": 0.9938 + }, + "0.05": { + "lambda": 0.9704, + "error_rate": 0.0124, + "savings": 0.5286, + "accuracy": 0.9876 + }, + "0.1": { + "lambda": 0.9317, + "error_rate": 0.0227, + "savings": 0.6783, + "accuracy": 0.9773 + }, + "0.15": { + "lambda": 0.8916, + "error_rate": 0.0351, + "savings": 0.7531, + "accuracy": 0.9649 + }, + "0.2": { + "lambda": 0.8528, + "error_rate": 0.0412, + "savings": 0.7916, + "accuracy": 0.9588 + }, + "0.25": { + "lambda": 0.7907, + "error_rate": 0.0639, + "savings": 0.84, + "accuracy": 0.9361 + }, + "0.3": { + "lambda": 0.72, + "error_rate": 0.0784, + "savings": 0.863, + "accuracy": 0.9216 + }, + "0.35": { + "lambda": 0.6114999999999999, + "error_rate": 0.0887, + "savings": 0.8782, + "accuracy": 0.9113 + }, + "0.4": { + "lambda": 0.3852, + "error_rate": 0.0948, + "savings": 0.8884, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh32/probe.pt b/qwen2.5-32b/supervised/qk_dh32/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaba5f8a6659f5f1b4c679c5def86388e005b7d1 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh32/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b0c8c2eb30b638da15d3ecd614f640533ead55fc722aa18e352b90fb3b4de5d +size 1313658 diff --git a/qwen2.5-32b/supervised/qk_dh512/config.json b/qwen2.5-32b/supervised/qk_dh512/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a060a8b47d979a2b46866467962236167c740b3b --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/config.json @@ -0,0 +1,43 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh512__lr0.01", + "d_hidden": 512, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": true, + "save_every": 10, + "d_phi": 5120, + "timestamp": "2026-03-29T09:10:31.331037", + "release_target": "qwen2.5-32b/supervised/qk_dh512", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh512__lr0.01/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/lambdas.json b/qwen2.5-32b/supervised/qk_dh512/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..5f05c77322b543b3076484a09c730a0f40e78a8b --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9952, + "0.025": 0.9894, + "0.05": 0.9738, + "0.1": 0.9506, + "0.15": 0.9036, + "0.2": 0.8639, + "0.25": 0.8142, + "0.3": 0.7515000000000001, + "0.35": 0.642, + "0.4": 0.40549999999999997, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/metrics.json b/qwen2.5-32b/supervised/qk_dh512/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..bc2cfcdb1c226fc619c4f3e6b5cef832e0f9b6e9 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9952, + "error_rate": 0.0078, + "savings": 0.0334, + "accuracy": 0.9922 + }, + "0.025": { + "lambda": 0.9894, + "error_rate": 0.0166, + "savings": 0.0919, + "accuracy": 0.9834 + }, + "0.05": { + "lambda": 0.9738, + "error_rate": 0.061, + "savings": 0.2547, + "accuracy": 0.939 + }, + "0.1": { + "lambda": 0.9506, + "error_rate": 0.0987, + "savings": 0.3975, + "accuracy": 0.9013 + }, + "0.15": { + "lambda": 0.9036, + "error_rate": 0.153, + "savings": 0.5723, + "accuracy": 0.847 + }, + "0.2": { + "lambda": 0.8639, + "error_rate": 0.1907, + "savings": 0.6626, + "accuracy": 0.8093 + }, + "0.25": { + "lambda": 0.8142, + "error_rate": 0.2395, + "savings": 0.7403, + "accuracy": 0.7605 + }, + "0.3": { + "lambda": 0.7515000000000001, + "error_rate": 0.296, + "savings": 0.8116, + "accuracy": 0.704 + }, + "0.35": { + "lambda": 0.642, + "error_rate": 0.3503, + "savings": 0.8844, + "accuracy": 0.6497 + }, + "0.4": { + "lambda": 0.40549999999999997, + "error_rate": 0.3891, + "savings": 0.9408, + "accuracy": 0.6109 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh512/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..fa3bbc5809dda03f11583ba4ab64f865d80f7877 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9952, + "error_rate": 0.0, + "savings": 0.0332, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9894, + "error_rate": 0.0, + "savings": 0.1148, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9738, + "error_rate": 0.05, + "savings": 0.1583, + "accuracy": 0.95 + }, + "0.1": { + "lambda": 0.9506, + "error_rate": 0.1, + "savings": 0.2735, + "accuracy": 0.9 + }, + "0.15": { + "lambda": 0.9036, + "error_rate": 0.15, + "savings": 0.4803, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8639, + "error_rate": 0.2, + "savings": 0.5354, + "accuracy": 0.8 + }, + "0.25": { + "lambda": 0.8142, + "error_rate": 0.3, + "savings": 0.694, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.7515000000000001, + "error_rate": 0.35, + "savings": 0.7494, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.642, + "error_rate": 0.45, + "savings": 0.8796, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.40549999999999997, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh512/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..f5ab9c226f808efe79f96c8871d3831db0340ed7 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9952, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9894, + "error_rate": 0.0, + "savings": 0.0313, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9738, + "error_rate": 0.0, + "savings": 0.1029, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9506, + "error_rate": 0.0556, + "savings": 0.2082, + "accuracy": 0.9444 + }, + "0.15": { + "lambda": 0.9036, + "error_rate": 0.0556, + "savings": 0.39, + "accuracy": 0.9444 + }, + "0.2": { + "lambda": 0.8639, + "error_rate": 0.0556, + "savings": 0.4938, + "accuracy": 0.9444 + }, + "0.25": { + "lambda": 0.8142, + "error_rate": 0.2222, + "savings": 0.5945, + "accuracy": 0.7778 + }, + "0.3": { + "lambda": 0.7515000000000001, + "error_rate": 0.2222, + "savings": 0.7025, + "accuracy": 0.7778 + }, + "0.35": { + "lambda": 0.642, + "error_rate": 0.3889, + "savings": 0.8746, + "accuracy": 0.6111 + }, + "0.4": { + "lambda": 0.40549999999999997, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh512/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..d7fd89a1be08aa0ec9d1374a892fb8f928f0cf1b --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9952, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9894, + "error_rate": 0.0, + "savings": 0.0351, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9738, + "error_rate": 0.0, + "savings": 0.0692, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9506, + "error_rate": 0.0, + "savings": 0.1105, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9036, + "error_rate": 0.2, + "savings": 0.3991, + "accuracy": 0.8 + }, + "0.2": { + "lambda": 0.8639, + "error_rate": 0.35, + "savings": 0.5442, + "accuracy": 0.65 + }, + "0.25": { + "lambda": 0.8142, + "error_rate": 0.35, + "savings": 0.6059, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7515000000000001, + "error_rate": 0.35, + "savings": 0.7046, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.642, + "error_rate": 0.35, + "savings": 0.8206, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.40549999999999997, + "error_rate": 0.5, + "savings": 0.9374, + "accuracy": 0.5 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh512/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..9a959d7e12aa607947ffeb66b7898a54c937c686 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9952, + "error_rate": 0.06, + "savings": 0.1352, + "accuracy": 0.94 + }, + "0.025": { + "lambda": 0.9894, + "error_rate": 0.11, + "savings": 0.2625, + "accuracy": 0.89 + }, + "0.05": { + "lambda": 0.9738, + "error_rate": 0.16, + "savings": 0.4756, + "accuracy": 0.84 + }, + "0.1": { + "lambda": 0.9506, + "error_rate": 0.26, + "savings": 0.6246, + "accuracy": 0.74 + }, + "0.15": { + "lambda": 0.9036, + "error_rate": 0.3, + "savings": 0.7836, + "accuracy": 0.7 + }, + "0.2": { + "lambda": 0.8639, + "error_rate": 0.36, + "savings": 0.8468, + "accuracy": 0.64 + }, + "0.25": { + "lambda": 0.8142, + "error_rate": 0.39, + "savings": 0.8959, + "accuracy": 0.61 + }, + "0.3": { + "lambda": 0.7515000000000001, + "error_rate": 0.4, + "savings": 0.9307, + "accuracy": 0.6 + }, + "0.35": { + "lambda": 0.642, + "error_rate": 0.41, + "savings": 0.951, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.40549999999999997, + "error_rate": 0.41, + "savings": 0.9554, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/ood_math500.json b/qwen2.5-32b/supervised/qk_dh512/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..855c379b06a4bf439154257046d0915bf849b4f6 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9952, + "error_rate": 0.0, + "savings": 0.1339, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9894, + "error_rate": 0.0021, + "savings": 0.2839, + "accuracy": 0.9979 + }, + "0.05": { + "lambda": 0.9738, + "error_rate": 0.0103, + "savings": 0.5115, + "accuracy": 0.9897 + }, + "0.1": { + "lambda": 0.9506, + "error_rate": 0.0206, + "savings": 0.6513, + "accuracy": 0.9794 + }, + "0.15": { + "lambda": 0.9036, + "error_rate": 0.033, + "savings": 0.7579, + "accuracy": 0.967 + }, + "0.2": { + "lambda": 0.8639, + "error_rate": 0.0392, + "savings": 0.7992, + "accuracy": 0.9608 + }, + "0.25": { + "lambda": 0.8142, + "error_rate": 0.066, + "savings": 0.8401, + "accuracy": 0.934 + }, + "0.3": { + "lambda": 0.7515000000000001, + "error_rate": 0.0784, + "savings": 0.8626, + "accuracy": 0.9216 + }, + "0.35": { + "lambda": 0.642, + "error_rate": 0.0866, + "savings": 0.8767, + "accuracy": 0.9134 + }, + "0.4": { + "lambda": 0.40549999999999997, + "error_rate": 0.0948, + "savings": 0.8884, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh512/probe.pt b/qwen2.5-32b/supervised/qk_dh512/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..897f95dc0622a3488ca03e7adb1033544684da37 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh512/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:badf60b7ed2419a479b6fa820831ec3b4b6608f7fb669961c86f96b2c1e741b2 +size 20976378 diff --git a/qwen2.5-32b/supervised/qk_dh64/config.json b/qwen2.5-32b/supervised/qk_dh64/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e4f836b3c49c588c43b7316e53cacd6458a93fc7 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/config.json @@ -0,0 +1,42 @@ +{ + "config": "configs/qwen32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwen32b/s1k/dataset.pkl", + "data_prepare/output/qwen32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwen32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwen32b/aime24/dataset.pkl", + "data_prepare/output/qwen32b/aime25/dataset.pkl", + "data_prepare/output/qwen32b/aime26/dataset.pkl", + "data_prepare/output/qwen32b/math500/dataset.pkl", + "data_prepare/output/qwen32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwen32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh64__lr0.01", + "d_hidden": 64, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": false, + "d_phi": 5120, + "timestamp": "2026-03-28T00:27:53.205868", + "release_target": "qwen2.5-32b/supervised/qk_dh64", + "release_probe_source": "qwen32b_5k/supervised/ttt__dh64__lr0.01/checkpoints/probe_ep10.pt" +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/lambdas.json b/qwen2.5-32b/supervised/qk_dh64/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..61161fecf57a8adf2694548cf6506284f60fe19f --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9911, + "0.025": 0.9802, + "0.05": 0.9719, + "0.1": 0.9365, + "0.15": 0.9011, + "0.2": 0.8604, + "0.25": 0.8046, + "0.3": 0.7379, + "0.35": 0.6463, + "0.4": 0.4132, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/metrics.json b/qwen2.5-32b/supervised/qk_dh64/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..eb59e2faa0780ff735ff3f1e90734b77f2e506f6 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9911, + "error_rate": 0.0133, + "savings": 0.0602, + "accuracy": 0.9867 + }, + "0.025": { + "lambda": 0.9802, + "error_rate": 0.0344, + "savings": 0.1866, + "accuracy": 0.9656 + }, + "0.05": { + "lambda": 0.9719, + "error_rate": 0.051, + "savings": 0.2531, + "accuracy": 0.949 + }, + "0.1": { + "lambda": 0.9365, + "error_rate": 0.1064, + "savings": 0.4393, + "accuracy": 0.8936 + }, + "0.15": { + "lambda": 0.9011, + "error_rate": 0.1452, + "savings": 0.5667, + "accuracy": 0.8548 + }, + "0.2": { + "lambda": 0.8604, + "error_rate": 0.1996, + "savings": 0.6662, + "accuracy": 0.8004 + }, + "0.25": { + "lambda": 0.8046, + "error_rate": 0.2472, + "savings": 0.749, + "accuracy": 0.7528 + }, + "0.3": { + "lambda": 0.7379, + "error_rate": 0.296, + "savings": 0.8211, + "accuracy": 0.704 + }, + "0.35": { + "lambda": 0.6463, + "error_rate": 0.3348, + "savings": 0.8763, + "accuracy": 0.6652 + }, + "0.4": { + "lambda": 0.4132, + "error_rate": 0.3891, + "savings": 0.9399, + "accuracy": 0.6109 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3947, + "savings": 0.9502, + "accuracy": 0.6053 + } + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/ood_aime24.json b/qwen2.5-32b/supervised/qk_dh64/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..31478bb800dcb9563ee105df2c954b1a80ea2c8e --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9911, + "error_rate": 0.0, + "savings": 0.0402, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9802, + "error_rate": 0.0, + "savings": 0.0956, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9719, + "error_rate": 0.0, + "savings": 0.1145, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9365, + "error_rate": 0.15, + "savings": 0.375, + "accuracy": 0.85 + }, + "0.15": { + "lambda": 0.9011, + "error_rate": 0.15, + "savings": 0.4494, + "accuracy": 0.85 + }, + "0.2": { + "lambda": 0.8604, + "error_rate": 0.2, + "savings": 0.5521, + "accuracy": 0.8 + }, + "0.25": { + "lambda": 0.8046, + "error_rate": 0.3, + "savings": 0.6523, + "accuracy": 0.7 + }, + "0.3": { + "lambda": 0.7379, + "error_rate": 0.35, + "savings": 0.7525, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6463, + "error_rate": 0.45, + "savings": 0.8669, + "accuracy": 0.55 + }, + "0.4": { + "lambda": 0.4132, + "error_rate": 0.55, + "savings": 0.9562, + "accuracy": 0.45 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9683, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/ood_aime25.json b/qwen2.5-32b/supervised/qk_dh64/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..ccaf085bfc75757cca2149404f1160a74c1856d3 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9911, + "error_rate": 0.0, + "savings": 0.0138, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9802, + "error_rate": 0.0, + "savings": 0.0671, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9719, + "error_rate": 0.0, + "savings": 0.0787, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9365, + "error_rate": 0.0, + "savings": 0.2971, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.9011, + "error_rate": 0.0556, + "savings": 0.3891, + "accuracy": 0.9444 + }, + "0.2": { + "lambda": 0.8604, + "error_rate": 0.1111, + "savings": 0.5372, + "accuracy": 0.8889 + }, + "0.25": { + "lambda": 0.8046, + "error_rate": 0.1667, + "savings": 0.6302, + "accuracy": 0.8333 + }, + "0.3": { + "lambda": 0.7379, + "error_rate": 0.2222, + "savings": 0.7348, + "accuracy": 0.7778 + }, + "0.35": { + "lambda": 0.6463, + "error_rate": 0.3333, + "savings": 0.8833, + "accuracy": 0.6667 + }, + "0.4": { + "lambda": 0.4132, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4444, + "savings": 0.9529, + "accuracy": 0.5556 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/ood_aime26.json b/qwen2.5-32b/supervised/qk_dh64/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..4b7423199697e7045bb801e823f69da3fe9a0c70 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9911, + "error_rate": 0.0, + "savings": 0.0178, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9802, + "error_rate": 0.0, + "savings": 0.0426, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9719, + "error_rate": 0.0, + "savings": 0.0659, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.9365, + "error_rate": 0.05, + "savings": 0.1686, + "accuracy": 0.95 + }, + "0.15": { + "lambda": 0.9011, + "error_rate": 0.25, + "savings": 0.4795, + "accuracy": 0.75 + }, + "0.2": { + "lambda": 0.8604, + "error_rate": 0.3, + "savings": 0.5215, + "accuracy": 0.7 + }, + "0.25": { + "lambda": 0.8046, + "error_rate": 0.35, + "savings": 0.6349, + "accuracy": 0.65 + }, + "0.3": { + "lambda": 0.7379, + "error_rate": 0.35, + "savings": 0.74, + "accuracy": 0.65 + }, + "0.35": { + "lambda": 0.6463, + "error_rate": 0.35, + "savings": 0.8346, + "accuracy": 0.65 + }, + "0.4": { + "lambda": 0.4132, + "error_rate": 0.45, + "savings": 0.918, + "accuracy": 0.55 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.55, + "savings": 0.9591, + "accuracy": 0.45 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/ood_gpqa_diamond.json b/qwen2.5-32b/supervised/qk_dh64/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..2b4505b64412c198f403e6b54500eda8467e9ba2 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9911, + "error_rate": 0.03, + "savings": 0.163, + "accuracy": 0.97 + }, + "0.025": { + "lambda": 0.9802, + "error_rate": 0.09, + "savings": 0.32, + "accuracy": 0.91 + }, + "0.05": { + "lambda": 0.9719, + "error_rate": 0.13, + "savings": 0.4109, + "accuracy": 0.87 + }, + "0.1": { + "lambda": 0.9365, + "error_rate": 0.22, + "savings": 0.6544, + "accuracy": 0.78 + }, + "0.15": { + "lambda": 0.9011, + "error_rate": 0.28, + "savings": 0.7527, + "accuracy": 0.72 + }, + "0.2": { + "lambda": 0.8604, + "error_rate": 0.36, + "savings": 0.8219, + "accuracy": 0.64 + }, + "0.25": { + "lambda": 0.8046, + "error_rate": 0.39, + "savings": 0.8941, + "accuracy": 0.61 + }, + "0.3": { + "lambda": 0.7379, + "error_rate": 0.41, + "savings": 0.9309, + "accuracy": 0.59 + }, + "0.35": { + "lambda": 0.6463, + "error_rate": 0.41, + "savings": 0.9438, + "accuracy": 0.59 + }, + "0.4": { + "lambda": 0.4132, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.41, + "savings": 0.9567, + "accuracy": 0.59 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/ood_math500.json b/qwen2.5-32b/supervised/qk_dh64/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..1bd58c49b21b2343732feea14a9823db91895c40 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9911, + "error_rate": 0.0062, + "savings": 0.2749, + "accuracy": 0.9938 + }, + "0.025": { + "lambda": 0.9802, + "error_rate": 0.0103, + "savings": 0.4578, + "accuracy": 0.9897 + }, + "0.05": { + "lambda": 0.9719, + "error_rate": 0.0103, + "savings": 0.5372, + "accuracy": 0.9897 + }, + "0.1": { + "lambda": 0.9365, + "error_rate": 0.0227, + "savings": 0.6862, + "accuracy": 0.9773 + }, + "0.15": { + "lambda": 0.9011, + "error_rate": 0.0371, + "savings": 0.7633, + "accuracy": 0.9629 + }, + "0.2": { + "lambda": 0.8604, + "error_rate": 0.0454, + "savings": 0.8031, + "accuracy": 0.9546 + }, + "0.25": { + "lambda": 0.8046, + "error_rate": 0.0722, + "savings": 0.8455, + "accuracy": 0.9278 + }, + "0.3": { + "lambda": 0.7379, + "error_rate": 0.0804, + "savings": 0.865, + "accuracy": 0.9196 + }, + "0.35": { + "lambda": 0.6463, + "error_rate": 0.0887, + "savings": 0.878, + "accuracy": 0.9113 + }, + "0.4": { + "lambda": 0.4132, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0948, + "savings": 0.8885, + "accuracy": 0.9052 + } +} \ No newline at end of file diff --git a/qwen2.5-32b/supervised/qk_dh64/probe.pt b/qwen2.5-32b/supervised/qk_dh64/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..14ef8fe01e6a4945a7b40a33f4b1d3be655dfcb5 --- /dev/null +++ b/qwen2.5-32b/supervised/qk_dh64/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecdd9ac3f30b77c3beeda1ce34e8fb48f65661927ab1a1d4b551b02b8db0b957 +size 2624506 diff --git a/qwq-32b/supervised/no_kq/config.json b/qwq-32b/supervised/no_kq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cf90aa6c441e36f36b4b54a992557ecd52d4786d --- /dev/null +++ b/qwq-32b/supervised/no_kq/config.json @@ -0,0 +1,43 @@ +{ + "config": "configs/qwq32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwq32b/s1k/dataset.pkl", + "data_prepare/output/qwq32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwq32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwq32b/aime24/dataset.pkl", + "data_prepare/output/qwq32b/aime25/dataset.pkl", + "data_prepare/output/qwq32b/aime26/dataset.pkl", + "data_prepare/output/qwq32b/math500/dataset.pkl", + "data_prepare/output/qwq32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwq32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__no_kq__lr0.01__ep40", + "d_hidden": 64, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 20, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": true, + "grad_clip": 1.0, + "force_retrain": true, + "save_every": 10, + "d_phi": 5120, + "timestamp": "2026-03-30T01:32:07.492149", + "release_target": "qwq-32b/supervised/no_kq", + "release_probe_source": "qwq32b_5k/supervised/ttt__no_kq__lr0.01__ep40/checkpoints/probe_ep20.pt" +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/lambdas.json b/qwq-32b/supervised/no_kq/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..39431de9fafb401cc080169c9eb150cdd5facc9f --- /dev/null +++ b/qwq-32b/supervised/no_kq/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.953, + "0.025": 0.9299999999999999, + "0.05": 0.9108, + "0.1": 0.8741, + "0.15": 0.8449, + "0.2": 0.8130999999999999, + "0.25": 0.7863, + "0.3": 0.7474000000000001, + "0.35": 0.7103999999999999, + "0.4": 0.6212, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/metrics.json b/qwq-32b/supervised/no_kq/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..05d4b0eb3bb23336841c5a1fdf57708a39cd88f8 --- /dev/null +++ b/qwq-32b/supervised/no_kq/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.953, + "error_rate": 0.0011, + "savings": 0.0353, + "accuracy": 0.9989 + }, + "0.025": { + "lambda": 0.9299999999999999, + "error_rate": 0.0181, + "savings": 0.1317, + "accuracy": 0.9819 + }, + "0.05": { + "lambda": 0.9108, + "error_rate": 0.0373, + "savings": 0.2283, + "accuracy": 0.9627 + }, + "0.1": { + "lambda": 0.8741, + "error_rate": 0.0814, + "savings": 0.3942, + "accuracy": 0.9186 + }, + "0.15": { + "lambda": 0.8449, + "error_rate": 0.1288, + "savings": 0.5153, + "accuracy": 0.8712 + }, + "0.2": { + "lambda": 0.8130999999999999, + "error_rate": 0.1898, + "savings": 0.6421, + "accuracy": 0.8102 + }, + "0.25": { + "lambda": 0.7863, + "error_rate": 0.2305, + "savings": 0.7118, + "accuracy": 0.7695 + }, + "0.3": { + "lambda": 0.7474000000000001, + "error_rate": 0.2915, + "savings": 0.7955, + "accuracy": 0.7085 + }, + "0.35": { + "lambda": 0.7103999999999999, + "error_rate": 0.3277, + "savings": 0.8525, + "accuracy": 0.6723 + }, + "0.4": { + "lambda": 0.6212, + "error_rate": 0.3876, + "savings": 0.9196, + "accuracy": 0.6124 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4, + "savings": 0.95, + "accuracy": 0.6 + } + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/ood_aime24.json b/qwq-32b/supervised/no_kq/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..60b5c850ff3fab23c094eed83a3a00222022ce3b --- /dev/null +++ b/qwq-32b/supervised/no_kq/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.953, + "error_rate": 0.0, + "savings": 0.0286, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9299999999999999, + "error_rate": 0.0, + "savings": 0.0621, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9108, + "error_rate": 0.0, + "savings": 0.192, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8741, + "error_rate": 0.0476, + "savings": 0.2819, + "accuracy": 0.9524 + }, + "0.15": { + "lambda": 0.8449, + "error_rate": 0.2381, + "savings": 0.4328, + "accuracy": 0.7619 + }, + "0.2": { + "lambda": 0.8130999999999999, + "error_rate": 0.4762, + "savings": 0.6232, + "accuracy": 0.5238 + }, + "0.25": { + "lambda": 0.7863, + "error_rate": 0.5238, + "savings": 0.7303, + "accuracy": 0.4762 + }, + "0.3": { + "lambda": 0.7474000000000001, + "error_rate": 0.5714, + "savings": 0.8128, + "accuracy": 0.4286 + }, + "0.35": { + "lambda": 0.7103999999999999, + "error_rate": 0.619, + "savings": 0.8644, + "accuracy": 0.381 + }, + "0.4": { + "lambda": 0.6212, + "error_rate": 0.6667, + "savings": 0.9137, + "accuracy": 0.3333 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.6667, + "savings": 0.94, + "accuracy": 0.3333 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/ood_aime25.json b/qwq-32b/supervised/no_kq/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..c36191c709d1a5f48cc18f6b83f67e4c297d9f0a --- /dev/null +++ b/qwq-32b/supervised/no_kq/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.953, + "error_rate": 0.0, + "savings": 0.0495, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9299999999999999, + "error_rate": 0.0769, + "savings": 0.0883, + "accuracy": 0.9231 + }, + "0.05": { + "lambda": 0.9108, + "error_rate": 0.0769, + "savings": 0.105, + "accuracy": 0.9231 + }, + "0.1": { + "lambda": 0.8741, + "error_rate": 0.1538, + "savings": 0.2441, + "accuracy": 0.8462 + }, + "0.15": { + "lambda": 0.8449, + "error_rate": 0.2308, + "savings": 0.3813, + "accuracy": 0.7692 + }, + "0.2": { + "lambda": 0.8130999999999999, + "error_rate": 0.2308, + "savings": 0.5208, + "accuracy": 0.7692 + }, + "0.25": { + "lambda": 0.7863, + "error_rate": 0.3077, + "savings": 0.6331, + "accuracy": 0.6923 + }, + "0.3": { + "lambda": 0.7474000000000001, + "error_rate": 0.3077, + "savings": 0.7149, + "accuracy": 0.6923 + }, + "0.35": { + "lambda": 0.7103999999999999, + "error_rate": 0.3846, + "savings": 0.7818, + "accuracy": 0.6154 + }, + "0.4": { + "lambda": 0.6212, + "error_rate": 0.3846, + "savings": 0.8755, + "accuracy": 0.6154 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3846, + "savings": 0.8939, + "accuracy": 0.6154 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/ood_aime26.json b/qwq-32b/supervised/no_kq/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..44740dc95340305db1c1cbbf2e2e3a66e1744ea0 --- /dev/null +++ b/qwq-32b/supervised/no_kq/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.953, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9299999999999999, + "error_rate": 0.0, + "savings": 0.002, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9108, + "error_rate": 0.0, + "savings": 0.0324, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8741, + "error_rate": 0.0, + "savings": 0.1812, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8449, + "error_rate": 0.0769, + "savings": 0.2976, + "accuracy": 0.9231 + }, + "0.2": { + "lambda": 0.8130999999999999, + "error_rate": 0.1538, + "savings": 0.443, + "accuracy": 0.8462 + }, + "0.25": { + "lambda": 0.7863, + "error_rate": 0.1538, + "savings": 0.4821, + "accuracy": 0.8462 + }, + "0.3": { + "lambda": 0.7474000000000001, + "error_rate": 0.3077, + "savings": 0.745, + "accuracy": 0.6923 + }, + "0.35": { + "lambda": 0.7103999999999999, + "error_rate": 0.3846, + "savings": 0.8101, + "accuracy": 0.6154 + }, + "0.4": { + "lambda": 0.6212, + "error_rate": 0.3846, + "savings": 0.8691, + "accuracy": 0.6154 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3846, + "savings": 0.9176, + "accuracy": 0.6154 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/ood_gpqa_diamond.json b/qwq-32b/supervised/no_kq/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..1f5ba0588f80ef7d71f96e5040cbd6d1895b81fa --- /dev/null +++ b/qwq-32b/supervised/no_kq/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.953, + "error_rate": 0.0299, + "savings": 0.1442, + "accuracy": 0.9701 + }, + "0.025": { + "lambda": 0.9299999999999999, + "error_rate": 0.0746, + "savings": 0.3129, + "accuracy": 0.9254 + }, + "0.05": { + "lambda": 0.9108, + "error_rate": 0.1194, + "savings": 0.3916, + "accuracy": 0.8806 + }, + "0.1": { + "lambda": 0.8741, + "error_rate": 0.209, + "savings": 0.537, + "accuracy": 0.791 + }, + "0.15": { + "lambda": 0.8449, + "error_rate": 0.2985, + "savings": 0.6881, + "accuracy": 0.7015 + }, + "0.2": { + "lambda": 0.8130999999999999, + "error_rate": 0.3582, + "savings": 0.7867, + "accuracy": 0.6418 + }, + "0.25": { + "lambda": 0.7863, + "error_rate": 0.403, + "savings": 0.8375, + "accuracy": 0.597 + }, + "0.3": { + "lambda": 0.7474000000000001, + "error_rate": 0.403, + "savings": 0.9092, + "accuracy": 0.597 + }, + "0.35": { + "lambda": 0.7103999999999999, + "error_rate": 0.403, + "savings": 0.9317, + "accuracy": 0.597 + }, + "0.4": { + "lambda": 0.6212, + "error_rate": 0.403, + "savings": 0.9383, + "accuracy": 0.597 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.403, + "savings": 0.9383, + "accuracy": 0.597 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/ood_math500.json b/qwq-32b/supervised/no_kq/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..d5f39168298a887e43789386c548e286e9208f27 --- /dev/null +++ b/qwq-32b/supervised/no_kq/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.953, + "error_rate": 0.0064, + "savings": 0.1377, + "accuracy": 0.9936 + }, + "0.025": { + "lambda": 0.9299999999999999, + "error_rate": 0.0085, + "savings": 0.3068, + "accuracy": 0.9915 + }, + "0.05": { + "lambda": 0.9108, + "error_rate": 0.0212, + "savings": 0.4335, + "accuracy": 0.9788 + }, + "0.1": { + "lambda": 0.8741, + "error_rate": 0.036, + "savings": 0.5885, + "accuracy": 0.964 + }, + "0.15": { + "lambda": 0.8449, + "error_rate": 0.0445, + "savings": 0.6701, + "accuracy": 0.9555 + }, + "0.2": { + "lambda": 0.8130999999999999, + "error_rate": 0.0551, + "savings": 0.725, + "accuracy": 0.9449 + }, + "0.25": { + "lambda": 0.7863, + "error_rate": 0.0614, + "savings": 0.75, + "accuracy": 0.9386 + }, + "0.3": { + "lambda": 0.7474000000000001, + "error_rate": 0.0657, + "savings": 0.7732, + "accuracy": 0.9343 + }, + "0.35": { + "lambda": 0.7103999999999999, + "error_rate": 0.0657, + "savings": 0.7844, + "accuracy": 0.9343 + }, + "0.4": { + "lambda": 0.6212, + "error_rate": 0.0678, + "savings": 0.7949, + "accuracy": 0.9322 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0678, + "savings": 0.8024, + "accuracy": 0.9322 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/no_kq/probe.pt b/qwq-32b/supervised/no_kq/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..61eb5139d32c81812eb68cd5d1b216c6469aa8f5 --- /dev/null +++ b/qwq-32b/supervised/no_kq/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a280e5aa3887725b533a79f9021af598c42ce80cfbcf6fbf67ef381502fe883f +size 22652 diff --git a/qwq-32b/supervised/qk_dh128/config.json b/qwq-32b/supervised/qk_dh128/config.json new file mode 100644 index 0000000000000000000000000000000000000000..409fb83b2a3a7c1950a304b40fb1dedfead47bdd --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/config.json @@ -0,0 +1,43 @@ +{ + "config": "configs/qwq32b_5k.yaml", + "method": "ttt", + "dataset_path": [ + "data_prepare/output/qwq32b/s1k/dataset.pkl", + "data_prepare/output/qwq32b/openr1_2k/dataset.pkl", + "data_prepare/output/qwq32b/deepmath_2k/dataset.pkl" + ], + "ood_paths": [ + "data_prepare/output/qwq32b/aime24/dataset.pkl", + "data_prepare/output/qwq32b/aime25/dataset.pkl", + "data_prepare/output/qwq32b/aime26/dataset.pkl", + "data_prepare/output/qwq32b/math500/dataset.pkl", + "data_prepare/output/qwq32b/gpqa_diamond/dataset.pkl" + ], + "output_dir": "results/qwq32b_5k", + "label_mode": "supervised", + "batch_size": 10, + "seed": 42, + "smooth_window": 10, + "run_name": "ttt__dh128__lr0.01__ep40", + "d_hidden": 128, + "use_ln": false, + "use_residual": false, + "learnable_eta": false, + "base_lr": 0.01, + "share_kq": false, + "use_mlp": false, + "use_pca": false, + "pca_dim": 256, + "epochs": 10, + "outer_lr": 0.001, + "no_meta_train": false, + "no_online_update": false, + "no_kq": false, + "grad_clip": 1.0, + "force_retrain": true, + "save_every": 10, + "d_phi": 5120, + "timestamp": "2026-03-30T01:37:02.166812", + "release_target": "qwq-32b/supervised/qk_dh128", + "release_probe_source": "qwq32b_5k/supervised/ttt__dh128__lr0.01__final_ep10/probe.pt" +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/lambdas.json b/qwq-32b/supervised/qk_dh128/lambdas.json new file mode 100644 index 0000000000000000000000000000000000000000..ce91cbc39f66d144060a432f854b78895f2bd0a7 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/lambdas.json @@ -0,0 +1,13 @@ +{ + "0.01": 0.9788, + "0.025": 0.9618, + "0.05": 0.9314, + "0.1": 0.8748, + "0.15": 0.8036, + "0.2": 0.7502, + "0.25": 0.6767000000000001, + "0.3": 0.5772999999999999, + "0.35": 0.4841, + "0.4": 0.34419999999999995, + "0.5": 9.999999999998899e-05 +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/metrics.json b/qwq-32b/supervised/qk_dh128/metrics.json new file mode 100644 index 0000000000000000000000000000000000000000..fd97ea7d25cd6c8370a6afe654a4dc7222c11f90 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/metrics.json @@ -0,0 +1,70 @@ +{ + "eps_results": { + "0.01": { + "lambda": 0.9788, + "error_rate": 0.009, + "savings": 0.0463, + "accuracy": 0.991 + }, + "0.025": { + "lambda": 0.9618, + "error_rate": 0.0226, + "savings": 0.1179, + "accuracy": 0.9774 + }, + "0.05": { + "lambda": 0.9314, + "error_rate": 0.0373, + "savings": 0.2312, + "accuracy": 0.9627 + }, + "0.1": { + "lambda": 0.8748, + "error_rate": 0.0757, + "savings": 0.3761, + "accuracy": 0.9243 + }, + "0.15": { + "lambda": 0.8036, + "error_rate": 0.1299, + "savings": 0.5322, + "accuracy": 0.8701 + }, + "0.2": { + "lambda": 0.7502, + "error_rate": 0.1706, + "savings": 0.6251, + "accuracy": 0.8294 + }, + "0.25": { + "lambda": 0.6767000000000001, + "error_rate": 0.2282, + "savings": 0.715, + "accuracy": 0.7718 + }, + "0.3": { + "lambda": 0.5772999999999999, + "error_rate": 0.2915, + "savings": 0.8069, + "accuracy": 0.7085 + }, + "0.35": { + "lambda": 0.4841, + "error_rate": 0.3412, + "savings": 0.8707, + "accuracy": 0.6588 + }, + "0.4": { + "lambda": 0.34419999999999995, + "error_rate": 0.3808, + "savings": 0.9194, + "accuracy": 0.6192 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.4, + "savings": 0.95, + "accuracy": 0.6 + } + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/ood_aime24.json b/qwq-32b/supervised/qk_dh128/ood_aime24.json new file mode 100644 index 0000000000000000000000000000000000000000..0fb65ec1e99be3466dfff4d636f06d83e2f76148 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/ood_aime24.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9788, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9618, + "error_rate": 0.0, + "savings": 0.0079, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9314, + "error_rate": 0.0, + "savings": 0.0323, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8748, + "error_rate": 0.0, + "savings": 0.1132, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8036, + "error_rate": 0.0, + "savings": 0.1593, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.7502, + "error_rate": 0.0952, + "savings": 0.2419, + "accuracy": 0.9048 + }, + "0.25": { + "lambda": 0.6767000000000001, + "error_rate": 0.2381, + "savings": 0.3786, + "accuracy": 0.7619 + }, + "0.3": { + "lambda": 0.5772999999999999, + "error_rate": 0.381, + "savings": 0.5919, + "accuracy": 0.619 + }, + "0.35": { + "lambda": 0.4841, + "error_rate": 0.4286, + "savings": 0.6854, + "accuracy": 0.5714 + }, + "0.4": { + "lambda": 0.34419999999999995, + "error_rate": 0.5238, + "savings": 0.7731, + "accuracy": 0.4762 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.6667, + "savings": 0.94, + "accuracy": 0.3333 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/ood_aime25.json b/qwq-32b/supervised/qk_dh128/ood_aime25.json new file mode 100644 index 0000000000000000000000000000000000000000..3a5b0b7bbb10c182660eb07cc048ba1c432a01a5 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/ood_aime25.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9788, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9618, + "error_rate": 0.0, + "savings": 0.0035, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9314, + "error_rate": 0.0, + "savings": 0.0322, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8748, + "error_rate": 0.0, + "savings": 0.1367, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8036, + "error_rate": 0.0, + "savings": 0.2891, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.7502, + "error_rate": 0.1538, + "savings": 0.43, + "accuracy": 0.8462 + }, + "0.25": { + "lambda": 0.6767000000000001, + "error_rate": 0.2308, + "savings": 0.5285, + "accuracy": 0.7692 + }, + "0.3": { + "lambda": 0.5772999999999999, + "error_rate": 0.2308, + "savings": 0.6006, + "accuracy": 0.7692 + }, + "0.35": { + "lambda": 0.4841, + "error_rate": 0.2308, + "savings": 0.6645, + "accuracy": 0.7692 + }, + "0.4": { + "lambda": 0.34419999999999995, + "error_rate": 0.3077, + "savings": 0.7325, + "accuracy": 0.6923 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3846, + "savings": 0.8939, + "accuracy": 0.6154 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/ood_aime26.json b/qwq-32b/supervised/qk_dh128/ood_aime26.json new file mode 100644 index 0000000000000000000000000000000000000000..03af4004bd387001bf35d28123e0c0341145ce42 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/ood_aime26.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9788, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9618, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9314, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.1": { + "lambda": 0.8748, + "error_rate": 0.0, + "savings": 0.0, + "accuracy": 1.0 + }, + "0.15": { + "lambda": 0.8036, + "error_rate": 0.0, + "savings": 0.1073, + "accuracy": 1.0 + }, + "0.2": { + "lambda": 0.7502, + "error_rate": 0.0, + "savings": 0.252, + "accuracy": 1.0 + }, + "0.25": { + "lambda": 0.6767000000000001, + "error_rate": 0.0, + "savings": 0.3013, + "accuracy": 1.0 + }, + "0.3": { + "lambda": 0.5772999999999999, + "error_rate": 0.0769, + "savings": 0.5174, + "accuracy": 0.9231 + }, + "0.35": { + "lambda": 0.4841, + "error_rate": 0.1538, + "savings": 0.5886, + "accuracy": 0.8462 + }, + "0.4": { + "lambda": 0.34419999999999995, + "error_rate": 0.3846, + "savings": 0.7962, + "accuracy": 0.6154 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.3846, + "savings": 0.9176, + "accuracy": 0.6154 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/ood_gpqa_diamond.json b/qwq-32b/supervised/qk_dh128/ood_gpqa_diamond.json new file mode 100644 index 0000000000000000000000000000000000000000..153e5c3f0a37074f116bd31b78cdc70c466f42d9 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/ood_gpqa_diamond.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9788, + "error_rate": 0.0, + "savings": 0.0292, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9618, + "error_rate": 0.0149, + "savings": 0.0721, + "accuracy": 0.9851 + }, + "0.05": { + "lambda": 0.9314, + "error_rate": 0.0597, + "savings": 0.1517, + "accuracy": 0.9403 + }, + "0.1": { + "lambda": 0.8748, + "error_rate": 0.1045, + "savings": 0.3707, + "accuracy": 0.8955 + }, + "0.15": { + "lambda": 0.8036, + "error_rate": 0.1493, + "savings": 0.5227, + "accuracy": 0.8507 + }, + "0.2": { + "lambda": 0.7502, + "error_rate": 0.194, + "savings": 0.5886, + "accuracy": 0.806 + }, + "0.25": { + "lambda": 0.6767000000000001, + "error_rate": 0.209, + "savings": 0.637, + "accuracy": 0.791 + }, + "0.3": { + "lambda": 0.5772999999999999, + "error_rate": 0.2687, + "savings": 0.7375, + "accuracy": 0.7313 + }, + "0.35": { + "lambda": 0.4841, + "error_rate": 0.2836, + "savings": 0.7829, + "accuracy": 0.7164 + }, + "0.4": { + "lambda": 0.34419999999999995, + "error_rate": 0.3582, + "savings": 0.8575, + "accuracy": 0.6418 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.403, + "savings": 0.9383, + "accuracy": 0.597 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/ood_math500.json b/qwq-32b/supervised/qk_dh128/ood_math500.json new file mode 100644 index 0000000000000000000000000000000000000000..80e70366e7237e7b59a5d9437ce7bd769a55cf62 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/ood_math500.json @@ -0,0 +1,68 @@ +{ + "0.01": { + "lambda": 0.9788, + "error_rate": 0.0, + "savings": 0.2002, + "accuracy": 1.0 + }, + "0.025": { + "lambda": 0.9618, + "error_rate": 0.0, + "savings": 0.298, + "accuracy": 1.0 + }, + "0.05": { + "lambda": 0.9314, + "error_rate": 0.0064, + "savings": 0.4219, + "accuracy": 0.9936 + }, + "0.1": { + "lambda": 0.8748, + "error_rate": 0.0148, + "savings": 0.5504, + "accuracy": 0.9852 + }, + "0.15": { + "lambda": 0.8036, + "error_rate": 0.0233, + "savings": 0.6463, + "accuracy": 0.9767 + }, + "0.2": { + "lambda": 0.7502, + "error_rate": 0.036, + "savings": 0.6856, + "accuracy": 0.964 + }, + "0.25": { + "lambda": 0.6767000000000001, + "error_rate": 0.0487, + "savings": 0.7267, + "accuracy": 0.9513 + }, + "0.3": { + "lambda": 0.5772999999999999, + "error_rate": 0.0551, + "savings": 0.7591, + "accuracy": 0.9449 + }, + "0.35": { + "lambda": 0.4841, + "error_rate": 0.0657, + "savings": 0.782, + "accuracy": 0.9343 + }, + "0.4": { + "lambda": 0.34419999999999995, + "error_rate": 0.0657, + "savings": 0.7944, + "accuracy": 0.9343 + }, + "0.5": { + "lambda": 9.999999999998899e-05, + "error_rate": 0.0678, + "savings": 0.8024, + "accuracy": 0.9322 + } +} \ No newline at end of file diff --git a/qwq-32b/supervised/qk_dh128/probe.pt b/qwq-32b/supervised/qk_dh128/probe.pt new file mode 100644 index 0000000000000000000000000000000000000000..70556e584ac62f7389a369e30995fb5098b74160 --- /dev/null +++ b/qwq-32b/supervised/qk_dh128/probe.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:252a9049ecb136339701f6b50fc914feaeca482300850760c865fcb73c6af0e1 +size 5246202