Spaces:
Running
Running
| { | |
| "experiment": "EXP-G4", | |
| "formula": "gamma = 1 - T_eval*sqrt(2)/theta", | |
| "date": "2026-04-18", | |
| "results": [ | |
| { | |
| "prefix": "EleutherAI--pythia-70m", | |
| "name": "pythia-70m", | |
| "pe_type": "RoPE", | |
| "notes": "", | |
| "theta": 10000, | |
| "d_head": 64, | |
| "T_train": 2048, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 0.7476017873166874, | |
| "gamma_new_pred": 0.717157287525381, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": 4.245163553501351, | |
| "err_old_pct": -25.239493568239496, | |
| "R2": 0.9893049417040555 | |
| }, | |
| { | |
| "prefix": "meta-llama--Meta-Llama-3-8B", | |
| "name": "Meta-Llama-3-8B", | |
| "pe_type": "RoPE", | |
| "notes": "", | |
| "theta": 500000, | |
| "d_head": 128, | |
| "T_train": 8192, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 1.0454762537473639, | |
| "gamma_new_pred": 0.9943431457505076, | |
| "gamma_old_pred": 0.7018781400200674, | |
| "err_new_pct": 5.1424006104313325, | |
| "err_old_pct": 48.95409817400391, | |
| "R2": 0.996718622313285 | |
| }, | |
| { | |
| "prefix": "Qwen--Qwen2.5-7B", | |
| "name": "Qwen2.5-7B", | |
| "pe_type": "RoPE", | |
| "notes": "", | |
| "theta": 1000000, | |
| "d_head": 128, | |
| "T_train": 8192, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 0.9966953735480816, | |
| "gamma_new_pred": 0.9971715728752538, | |
| "gamma_old_pred": 0.6666637444455867, | |
| "err_new_pct": -0.047755004266634545, | |
| "err_old_pct": 49.5049613620367, | |
| "R2": 0.9963935840252253 | |
| }, | |
| { | |
| "prefix": "meta-llama--Llama-2-7b-hf", | |
| "name": "Llama-2-7b-hf", | |
| "pe_type": "RoPE", | |
| "notes": "artifact", | |
| "theta": 10000, | |
| "d_head": 128, | |
| "T_train": 4096, | |
| "T_eval_max": 2000.0, | |
| "n_points": 9, | |
| "gamma_obs": 0.2870574377368437, | |
| "gamma_new_pred": 0.717157287525381, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": -59.97287586278841, | |
| "err_old_pct": -71.29413039896974, | |
| "R2": 0.881768027724978 | |
| }, | |
| { | |
| "prefix": "mistralai--Mistral-7B-v0.1", | |
| "name": "Mistral-7B-v0.1", | |
| "pe_type": "RoPE", | |
| "notes": "only_2pts", | |
| "theta": 10000, | |
| "d_head": 128, | |
| "T_train": 8192, | |
| "T_eval_max": 50.0, | |
| "n_points": 2, | |
| "gamma_obs": 1.213076772373504, | |
| "gamma_new_pred": 0.9929289321881345, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": 22.17156062722697, | |
| "err_old_pct": 21.30820897145856, | |
| "R2": 1.0 | |
| }, | |
| { | |
| "prefix": "mistralai--Mistral-Nemo-Instruct-2407", | |
| "name": "Mistral-Nemo-Instruct-2407", | |
| "pe_type": "RoPE", | |
| "notes": "partial_7pts", | |
| "theta": 1000000, | |
| "d_head": 128, | |
| "T_train": 131072, | |
| "T_eval_max": 500.0, | |
| "n_points": 7, | |
| "gamma_obs": 0.5407084190220748, | |
| "gamma_new_pred": 0.9992928932188134, | |
| "gamma_old_pred": 0.6666637444455867, | |
| "err_new_pct": -45.8908971842676, | |
| "err_old_pct": -18.893381629483898, | |
| "R2": 0.9671649502959694 | |
| }, | |
| { | |
| "prefix": "google--gemma-2-9b-it", | |
| "name": "gemma-2-9b-it", | |
| "pe_type": "RoPE", | |
| "notes": "partial", | |
| "theta": 10000, | |
| "d_head": 256, | |
| "T_train": 8192, | |
| "T_eval_max": 1000.0, | |
| "n_points": 8, | |
| "gamma_obs": 0.6586407289285032, | |
| "gamma_new_pred": 0.8585786437626906, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": -23.287082236050793, | |
| "err_old_pct": -34.13563840181085, | |
| "R2": 0.97730793078849 | |
| }, | |
| { | |
| "prefix": "tiiuae--falcon-7b", | |
| "name": "falcon-7b", | |
| "pe_type": "ALiBi", | |
| "notes": "no_rope", | |
| "theta": 10000, | |
| "d_head": 64, | |
| "T_train": 2048, | |
| "T_eval_max": 1000.0, | |
| "n_points": 8, | |
| "gamma_obs": 0.8928207115404576, | |
| "gamma_new_pred": 0.8585786437626906, | |
| "gamma_old_pred": 0.99999561666838, | |
| "err_new_pct": 3.9882272901294638, | |
| "err_old_pct": -10.717537491313212, | |
| "R2": 0.9927613215692025 | |
| } | |
| ] | |
| } |