Spaces:
Running
Running
| { | |
| "results": [ | |
| { | |
| "label": "Qwen2.5-0.5B", | |
| "hf_id": "Qwen/Qwen2.5-0.5B", | |
| "n_kv": 2, | |
| "d_head": 64, | |
| "n_params_M": 490, | |
| "theta": 1000000, | |
| "T_train": 32768, | |
| "R_c": 17.07756720897567, | |
| "random": { | |
| "gamma": 0.9062338770446412, | |
| "r2": 0.9909270565050566, | |
| "means": { | |
| "10": 0.00933715307641597, | |
| "20": 0.008325269828949655, | |
| "30": 0.0052897306567146665, | |
| "50": 0.0035934632803712573, | |
| "100": 0.0014672519408521199, | |
| "200": 0.0010016655212356932, | |
| "500": 0.0004682919560443787, | |
| "1000": 0.00020250596815631503 | |
| } | |
| }, | |
| "text": { | |
| "gamma": 0.8614455324492792, | |
| "r2": 0.9644449017304708, | |
| "means": { | |
| "10": 0.008442668694825399, | |
| "20": 0.00615490938226382, | |
| "30": 0.004094105288386345, | |
| "50": 0.002889665500039146, | |
| "100": 0.0011341503723746255, | |
| "200": 0.0006860575541144325, | |
| "500": 0.000253526685493333, | |
| "1000": 0.0002539449149654025 | |
| } | |
| }, | |
| "delta_gamma": 0.04478834459536196, | |
| "verdict": "neutral" | |
| }, | |
| { | |
| "label": "Qwen2.5-1.5B", | |
| "hf_id": "Qwen/Qwen2.5-1.5B", | |
| "n_kv": 2, | |
| "d_head": 128, | |
| "n_params_M": 1540, | |
| "theta": 1000000, | |
| "T_train": 32768, | |
| "R_c": 3.085057430752342, | |
| "random": { | |
| "gamma": 0.980402153144676, | |
| "r2": 0.9594453213557013, | |
| "means": { | |
| "10": 0.01075944620832533, | |
| "20": 0.006380008729985905, | |
| "30": 0.003982916441592542, | |
| "50": 0.0022768721139679354, | |
| "100": 0.0010736816094486287, | |
| "200": 0.0008009741012054153, | |
| "500": 0.00015360812346140544, | |
| "1000": 0.000157832403977712 | |
| } | |
| }, | |
| "text": { | |
| "gamma": 1.0634506863606663, | |
| "r2": 0.9769510380449103, | |
| "means": { | |
| "10": 0.010448309971608, | |
| "20": 0.006884462779123603, | |
| "30": 0.0044515742664843535, | |
| "50": 0.002194346263345364, | |
| "100": 0.0008933738613805027, | |
| "200": 0.0004605254583191453, | |
| "500": 0.00014063805924003966, | |
| "1000": 0.00012315947113307655 | |
| } | |
| }, | |
| "delta_gamma": -0.08304853321599026, | |
| "verdict": "neutral" | |
| }, | |
| { | |
| "label": "SmolLM2-1.7B", | |
| "hf_id": "HuggingFaceTB/SmolLM2-1.7B", | |
| "n_kv": 32, | |
| "d_head": 64, | |
| "n_params_M": 1700, | |
| "theta": 130000, | |
| "T_train": 8192, | |
| "R_c": 0.47905044355233856, | |
| "random": { | |
| "gamma": 0.8662591994464454, | |
| "r2": 0.9970100032742922, | |
| "means": { | |
| "10": 0.00644491306040436, | |
| "20": 0.0052320567176987725, | |
| "30": 0.0030720023019239306, | |
| "50": 0.0020971989150469503, | |
| "100": 0.0012476958287879825, | |
| "200": 0.0006285492129003009, | |
| "500": 0.00025838499888777735, | |
| "1000": 0.00016099023167043925 | |
| } | |
| }, | |
| "text": { | |
| "gamma": 0.8459469449319719, | |
| "r2": 0.9628408330570999, | |
| "means": { | |
| "10": 0.005857278377128144, | |
| "20": 0.0038359998057906825, | |
| "30": 0.0027658051640416185, | |
| "50": 0.0016047325575103363, | |
| "100": 0.0008706180502971013, | |
| "200": 0.0006290887234111627, | |
| "500": 0.0003532431973144412, | |
| "1000": 0.00010335376486182212 | |
| } | |
| }, | |
| "delta_gamma": 0.020312254514473493, | |
| "verdict": "neutral" | |
| }, | |
| { | |
| "label": "Phi-3.5-mini", | |
| "hf_id": "microsoft/Phi-3.5-mini-instruct", | |
| "n_kv": 32, | |
| "d_head": 96, | |
| "n_params_M": 3820, | |
| "theta": 10000, | |
| "T_train": 131072, | |
| "R_c": 0.48685674820547287, | |
| "error": "measure: type object 'DynamicCache' has no attribute 'from_legacy_cache'" | |
| }, | |
| { | |
| "label": "Qwen2.5-3B", | |
| "hf_id": "Qwen/Qwen2.5-3B", | |
| "n_kv": 2, | |
| "d_head": 128, | |
| "n_params_M": 3090, | |
| "theta": 1000000, | |
| "T_train": 32768, | |
| "R_c": 2.0342189993534796, | |
| "random": { | |
| "gamma": 0.7464802030156852, | |
| "r2": 0.9879267676037468, | |
| "means": { | |
| "10": 0.009178792749428087, | |
| "20": 0.006702715104652776, | |
| "30": 0.004231071588065889, | |
| "50": 0.003230642717745569, | |
| "100": 0.0015271495696571138, | |
| "200": 0.0009070214380820593, | |
| "500": 0.00047202539319793384, | |
| "1000": 0.000346068793700801 | |
| } | |
| }, | |
| "text": { | |
| "gamma": 0.799209027274833, | |
| "r2": 0.927322958064882, | |
| "means": { | |
| "10": 0.008374775254891978, | |
| "20": 0.004851305995964342, | |
| "30": 0.0030537243146035405, | |
| "50": 0.0022043935333689053, | |
| "100": 0.0010392124433484342, | |
| "200": 0.0005058742376665274, | |
| "500": 0.00019562873782383072, | |
| "1000": 0.0002686874713334772 | |
| } | |
| }, | |
| "delta_gamma": -0.05272882425914782, | |
| "verdict": "neutral" | |
| }, | |
| { | |
| "label": "Llama-3.2-3B", | |
| "hf_id": "meta-llama/Llama-3.2-3B", | |
| "n_kv": 8, | |
| "d_head": 128, | |
| "n_params_M": 3210, | |
| "theta": 500000, | |
| "T_train": 131072, | |
| "R_c": 1.331335711921419, | |
| "random": { | |
| "gamma": 0.678947152194417, | |
| "r2": 0.9423162919570118, | |
| "means": { | |
| "10": 0.0053288231985200015, | |
| "20": 0.003921971014212994, | |
| "30": 0.0029104793018528393, | |
| "50": 0.002017381762464841, | |
| "100": 0.0014156586463962282, | |
| "200": 0.0010305073467038928, | |
| "500": 0.00029545511518205917, | |
| "1000": 0.00033367779638086045 | |
| } | |
| }, | |
| "text": { | |
| "gamma": 0.5942645260566438, | |
| "r2": 0.9633619728314922, | |
| "means": { | |
| "10": 0.006716178983804726, | |
| "20": 0.004247029634813468, | |
| "30": 0.0026757050908747175, | |
| "50": 0.0018435514008715039, | |
| "100": 0.0012482281100182305, | |
| "200": 0.0007410081138923054, | |
| "500": 0.00036949885388215385, | |
| "1000": 0.00039038256520316714 | |
| } | |
| }, | |
| "delta_gamma": 0.08468262613777322, | |
| "verdict": "neutral" | |
| }, | |
| { | |
| "label": "Yi-6B", | |
| "hf_id": "01-ai/Yi-6B", | |
| "n_kv": 4, | |
| "d_head": 128, | |
| "n_params_M": 6060, | |
| "theta": 5000000, | |
| "T_train": 4096, | |
| "R_c": 1.2751049391099323, | |
| "random": { | |
| "gamma": 0.9664360975501075, | |
| "r2": 0.9795433573586503, | |
| "means": { | |
| "10": 0.007488797352125403, | |
| "20": 0.006183944555632479, | |
| "30": 0.0037867348757026774, | |
| "50": 0.0034175124804460212, | |
| "100": 0.0016160887095566067, | |
| "200": 0.0010107121610053581, | |
| "500": 0.00031756698555682303, | |
| "1000": 0.00014130359336604669 | |
| } | |
| }, | |
| "text": { | |
| "gamma": 0.841961271334462, | |
| "r2": 0.9822390595591267, | |
| "means": { | |
| "10": 0.006165254368618207, | |
| "20": 0.004001568540134031, | |
| "30": 0.0020899126197446096, | |
| "50": 0.001873674674262702, | |
| "100": 0.0009678956000712446, | |
| "200": 0.0006449051997040556, | |
| "500": 0.00022300777077084887, | |
| "1000": 0.00012313241315040636 | |
| } | |
| }, | |
| "delta_gamma": 0.12447482621564554, | |
| "verdict": "pre-IH" | |
| }, | |
| { | |
| "label": "Qwen2.5-7B", | |
| "hf_id": "Qwen/Qwen2.5-7B", | |
| "n_kv": 4, | |
| "d_head": 128, | |
| "n_params_M": 7610, | |
| "theta": 1000000, | |
| "T_train": 131072, | |
| "R_c": 1.1765191580975, | |
| "error": "measure: CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 14.56 GiB of which 33.81 MiB is free. Including non-PyTorch memory, this process has 14.53 GiB memory in use. Of the allocated memory 14.37 GiB is allocated by PyTorch, and 27.00 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" | |
| } | |
| ], | |
| "elapsed_min": 62.35337897141775, | |
| "n_completed": 6, | |
| "panel_size": 12, | |
| "incomplete_reason": "disk space limit" | |
| } |