Spaces:
Running
Running
File size: 5,195 Bytes
535348a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | {
"config": {
"N_full": 2048,
"f_target": 0.9
},
"results": [
{
"model": "EleutherAI/pythia-14m",
"tag": "pythia-14m",
"gamma": 0.685,
"phase": "A",
"D_f": 1466,
"N_full": 2048,
"f_target": 0.9,
"ppl_full": 78.36305186690407,
"ppl_at_df": 82.73959301570233,
"delta_ppl_df": 4.376541148798253,
"knee_heuristic": 1280,
"df_ratio": 0.7158203125,
"sweep": {
"64": 170.27565431091813,
"128": 129.87243953384964,
"256": 116.10089939684218,
"512": 100.39440165679781,
"768": 93.42492785289417,
"1024": 87.69748565849753,
"1280": 82.71468331445736,
"1466": 82.73959301570233,
"1536": 84.26238377642993,
"2048": 78.36305186690407
}
},
{
"model": "EleutherAI/pythia-70m",
"tag": "pythia-70m",
"gamma": 0.748,
"phase": "A",
"D_f": 1348,
"N_full": 2048,
"f_target": 0.9,
"ppl_full": 39.42330332269695,
"ppl_at_df": 38.29037532881002,
"delta_ppl_df": -1.1329279938869306,
"knee_heuristic": 1024,
"df_ratio": 0.658203125,
"sweep": {
"64": 78.26171993639424,
"128": 60.39432733839407,
"256": 53.77005947252492,
"512": 46.90251454572783,
"768": 43.56558823784614,
"1024": 41.0769018008491,
"1280": 39.57051656987743,
"1348": 38.29037532881002,
"1536": 41.25781274772638,
"2048": 39.42330332269695
}
},
{
"model": "EleutherAI/pythia-160m",
"tag": "pythia-160m",
"gamma": 0.511,
"phase": "A",
"D_f": 1651,
"N_full": 2048,
"f_target": 0.9,
"ppl_full": 22.49693114269694,
"ppl_at_df": 23.941233262843955,
"delta_ppl_df": 1.4443021201470145,
"knee_heuristic": 1024,
"df_ratio": 0.80615234375,
"sweep": {
"64": 45.05178022852514,
"128": 35.39264061338946,
"256": 30.868753730150203,
"512": 27.10712340123499,
"768": 25.202340040551977,
"1024": 23.906966979465913,
"1280": 22.180796411565147,
"1536": 23.290909891590083,
"1651": 23.941233262843955,
"2048": 22.49693114269694
}
},
{
"model": "EleutherAI/pythia-410m",
"tag": "pythia-410m",
"gamma": 1.022,
"phase": "B",
"D_f": 2048,
"N_full": 2048,
"f_target": 0.9,
"ppl_full": 13.986634758664158,
"ppl_at_df": 13.986634758664158,
"delta_ppl_df": 0.0,
"knee_heuristic": 768,
"df_ratio": 1.0,
"sweep": {
"64": 25.99515452550955,
"128": 20.497028981644508,
"256": 18.463651885474807,
"512": 15.943368404335239,
"768": 14.291077859966823,
"1024": 14.35273375572969,
"1280": 13.495131622333211,
"1536": 13.658299116074033,
"2048": 13.986634758664158
}
},
{
"model": "EleutherAI/pythia-1b",
"tag": "pythia-1b",
"gamma": 0.931,
"phase": "A",
"D_f": 445,
"N_full": 2048,
"f_target": 0.9,
"ppl_full": 11.797592766397049,
"ppl_at_df": 12.797310428095802,
"delta_ppl_df": 0.9997176616987531,
"knee_heuristic": 768,
"df_ratio": 0.21728515625,
"sweep": {
"64": 20.82336859802798,
"128": 16.34351822999527,
"256": 14.961815199990014,
"445": 12.797310428095802,
"512": 13.00719843117948,
"768": 11.944238442848128,
"1024": 11.90498469103213,
"1280": 11.308198053102004,
"1536": 11.367630159231787,
"2048": 11.797592766397049
}
},
{
"model": "EleutherAI/pythia-1.4b",
"tag": "pythia-1.4b",
"gamma": 0.705,
"phase": "A",
"D_f": 1433,
"N_full": 2048,
"f_target": 0.9,
"ppl_full": 10.531278903927081,
"ppl_at_df": 10.359741099193672,
"delta_ppl_df": -0.17153780473340952,
"knee_heuristic": 768,
"df_ratio": 0.69970703125,
"sweep": {
"64": 19.10579824421483,
"128": 14.338374193623926,
"256": 13.28459133971229,
"512": 11.393840662956944,
"768": 10.782337109011515,
"1024": 10.669041088499121,
"1280": 10.046359061813407,
"1433": 10.359741099193672,
"1536": 10.097998537270568,
"2048": 10.531278903927081
}
},
{
"model": "EleutherAI/pythia-2.8b",
"tag": "pythia-2.8b",
"gamma": 0.674,
"phase": "A",
"D_f": 1482,
"N_full": 2048,
"f_target": 0.9,
"ppl_full": 9.207173515380948,
"ppl_at_df": 8.86634974640192,
"delta_ppl_df": -0.34082376897902833,
"knee_heuristic": 768,
"df_ratio": 0.7236328125,
"sweep": {
"64": 16.845721029085166,
"128": 12.46358100808799,
"256": 11.516145416329916,
"512": 10.08633604056202,
"768": 9.462097751139718,
"1024": 9.502726676862784,
"1280": 8.677919928249965,
"1482": 8.86634974640192,
"1536": 8.778613874635514,
"2048": 9.207173515380948
}
}
]
} |