{ "results": [ { "label": "Qwen2.5-0.5B", "hf_id": "Qwen/Qwen2.5-0.5B", "n_kv": 2, "d_head": 64, "n_params_M": 490, "theta": 1000000, "T_train": 32768, "R_c": 17.07756720897567, "random": { "gamma": 0.9062338770446412, "r2": 0.9909270565050566, "means": { "10": 0.00933715307641597, "20": 0.008325269828949655, "30": 0.0052897306567146665, "50": 0.0035934632803712573, "100": 0.0014672519408521199, "200": 0.0010016655212356932, "500": 0.0004682919560443787, "1000": 0.00020250596815631503 } }, "text": { "gamma": 0.8614455324492792, "r2": 0.9644449017304708, "means": { "10": 0.008442668694825399, "20": 0.00615490938226382, "30": 0.004094105288386345, "50": 0.002889665500039146, "100": 0.0011341503723746255, "200": 0.0006860575541144325, "500": 0.000253526685493333, "1000": 0.0002539449149654025 } }, "delta_gamma": 0.04478834459536196, "verdict": "neutral" }, { "label": "Qwen2.5-1.5B", "hf_id": "Qwen/Qwen2.5-1.5B", "n_kv": 2, "d_head": 128, "n_params_M": 1540, "theta": 1000000, "T_train": 32768, "R_c": 3.085057430752342, "random": { "gamma": 0.980402153144676, "r2": 0.9594453213557013, "means": { "10": 0.01075944620832533, "20": 0.006380008729985905, "30": 0.003982916441592542, "50": 0.0022768721139679354, "100": 0.0010736816094486287, "200": 0.0008009741012054153, "500": 0.00015360812346140544, "1000": 0.000157832403977712 } }, "text": { "gamma": 1.0634506863606663, "r2": 0.9769510380449103, "means": { "10": 0.010448309971608, "20": 0.006884462779123603, "30": 0.0044515742664843535, "50": 0.002194346263345364, "100": 0.0008933738613805027, "200": 0.0004605254583191453, "500": 0.00014063805924003966, "1000": 0.00012315947113307655 } }, "delta_gamma": -0.08304853321599026, "verdict": "neutral" }, { "label": "SmolLM2-1.7B", "hf_id": "HuggingFaceTB/SmolLM2-1.7B", "n_kv": 32, "d_head": 64, "n_params_M": 1700, "theta": 130000, "T_train": 8192, "R_c": 0.47905044355233856, "random": { "gamma": 0.8662591994464454, "r2": 0.9970100032742922, "means": { "10": 0.00644491306040436, "20": 0.0052320567176987725, "30": 0.0030720023019239306, "50": 0.0020971989150469503, "100": 0.0012476958287879825, "200": 0.0006285492129003009, "500": 0.00025838499888777735, "1000": 0.00016099023167043925 } }, "text": { "gamma": 0.8459469449319719, "r2": 0.9628408330570999, "means": { "10": 0.005857278377128144, "20": 0.0038359998057906825, "30": 0.0027658051640416185, "50": 0.0016047325575103363, "100": 0.0008706180502971013, "200": 0.0006290887234111627, "500": 0.0003532431973144412, "1000": 0.00010335376486182212 } }, "delta_gamma": 0.020312254514473493, "verdict": "neutral" }, { "label": "Phi-3.5-mini", "hf_id": "microsoft/Phi-3.5-mini-instruct", "n_kv": 32, "d_head": 96, "n_params_M": 3820, "theta": 10000, "T_train": 131072, "R_c": 0.48685674820547287, "error": "measure: type object 'DynamicCache' has no attribute 'from_legacy_cache'" }, { "label": "Qwen2.5-3B", "hf_id": "Qwen/Qwen2.5-3B", "n_kv": 2, "d_head": 128, "n_params_M": 3090, "theta": 1000000, "T_train": 32768, "R_c": 2.0342189993534796, "random": { "gamma": 0.7464802030156852, "r2": 0.9879267676037468, "means": { "10": 0.009178792749428087, "20": 0.006702715104652776, "30": 0.004231071588065889, "50": 0.003230642717745569, "100": 0.0015271495696571138, "200": 0.0009070214380820593, "500": 0.00047202539319793384, "1000": 0.000346068793700801 } }, "text": { "gamma": 0.799209027274833, "r2": 0.927322958064882, "means": { "10": 0.008374775254891978, "20": 0.004851305995964342, "30": 0.0030537243146035405, "50": 0.0022043935333689053, "100": 0.0010392124433484342, "200": 0.0005058742376665274, "500": 0.00019562873782383072, "1000": 0.0002686874713334772 } }, "delta_gamma": -0.05272882425914782, "verdict": "neutral" }, { "label": "Llama-3.2-3B", "hf_id": "meta-llama/Llama-3.2-3B", "n_kv": 8, "d_head": 128, "n_params_M": 3210, "theta": 500000, "T_train": 131072, "R_c": 1.331335711921419, "random": { "gamma": 0.678947152194417, "r2": 0.9423162919570118, "means": { "10": 0.0053288231985200015, "20": 0.003921971014212994, "30": 0.0029104793018528393, "50": 0.002017381762464841, "100": 0.0014156586463962282, "200": 0.0010305073467038928, "500": 0.00029545511518205917, "1000": 0.00033367779638086045 } }, "text": { "gamma": 0.5942645260566438, "r2": 0.9633619728314922, "means": { "10": 0.006716178983804726, "20": 0.004247029634813468, "30": 0.0026757050908747175, "50": 0.0018435514008715039, "100": 0.0012482281100182305, "200": 0.0007410081138923054, "500": 0.00036949885388215385, "1000": 0.00039038256520316714 } }, "delta_gamma": 0.08468262613777322, "verdict": "neutral" }, { "label": "Yi-6B", "hf_id": "01-ai/Yi-6B", "n_kv": 4, "d_head": 128, "n_params_M": 6060, "theta": 5000000, "T_train": 4096, "R_c": 1.2751049391099323, "random": { "gamma": 0.9664360975501075, "r2": 0.9795433573586503, "means": { "10": 0.007488797352125403, "20": 0.006183944555632479, "30": 0.0037867348757026774, "50": 0.0034175124804460212, "100": 0.0016160887095566067, "200": 0.0010107121610053581, "500": 0.00031756698555682303, "1000": 0.00014130359336604669 } }, "text": { "gamma": 0.841961271334462, "r2": 0.9822390595591267, "means": { "10": 0.006165254368618207, "20": 0.004001568540134031, "30": 0.0020899126197446096, "50": 0.001873674674262702, "100": 0.0009678956000712446, "200": 0.0006449051997040556, "500": 0.00022300777077084887, "1000": 0.00012313241315040636 } }, "delta_gamma": 0.12447482621564554, "verdict": "pre-IH" }, { "label": "Qwen2.5-7B", "hf_id": "Qwen/Qwen2.5-7B", "n_kv": 4, "d_head": 128, "n_params_M": 7610, "theta": 1000000, "T_train": 131072, "R_c": 1.1765191580975, "error": "measure: CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 14.56 GiB of which 33.81 MiB is free. Including non-PyTorch memory, this process has 14.53 GiB memory in use. Of the allocated memory 14.37 GiB is allocated by PyTorch, and 27.00 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" } ], "elapsed_min": 62.35337897141775, "n_completed": 6, "panel_size": 12, "incomplete_reason": "disk space limit" }