{ "config": { "N_full": 2048, "f_target": 0.9 }, "results": [ { "model": "EleutherAI/pythia-14m", "tag": "pythia-14m", "gamma": 0.685, "phase": "A", "D_f": 1466, "N_full": 2048, "f_target": 0.9, "ppl_full": 78.36305186690407, "ppl_at_df": 82.73959301570233, "delta_ppl_df": 4.376541148798253, "knee_heuristic": 1280, "df_ratio": 0.7158203125, "sweep": { "64": 170.27565431091813, "128": 129.87243953384964, "256": 116.10089939684218, "512": 100.39440165679781, "768": 93.42492785289417, "1024": 87.69748565849753, "1280": 82.71468331445736, "1466": 82.73959301570233, "1536": 84.26238377642993, "2048": 78.36305186690407 } }, { "model": "EleutherAI/pythia-70m", "tag": "pythia-70m", "gamma": 0.748, "phase": "A", "D_f": 1348, "N_full": 2048, "f_target": 0.9, "ppl_full": 39.42330332269695, "ppl_at_df": 38.29037532881002, "delta_ppl_df": -1.1329279938869306, "knee_heuristic": 1024, "df_ratio": 0.658203125, "sweep": { "64": 78.26171993639424, "128": 60.39432733839407, "256": 53.77005947252492, "512": 46.90251454572783, "768": 43.56558823784614, "1024": 41.0769018008491, "1280": 39.57051656987743, "1348": 38.29037532881002, "1536": 41.25781274772638, "2048": 39.42330332269695 } }, { "model": "EleutherAI/pythia-160m", "tag": "pythia-160m", "gamma": 0.511, "phase": "A", "D_f": 1651, "N_full": 2048, "f_target": 0.9, "ppl_full": 22.49693114269694, "ppl_at_df": 23.941233262843955, "delta_ppl_df": 1.4443021201470145, "knee_heuristic": 1024, "df_ratio": 0.80615234375, "sweep": { "64": 45.05178022852514, "128": 35.39264061338946, "256": 30.868753730150203, "512": 27.10712340123499, "768": 25.202340040551977, "1024": 23.906966979465913, "1280": 22.180796411565147, "1536": 23.290909891590083, "1651": 23.941233262843955, "2048": 22.49693114269694 } }, { "model": "EleutherAI/pythia-410m", "tag": "pythia-410m", "gamma": 1.022, "phase": "B", "D_f": 2048, "N_full": 2048, "f_target": 0.9, "ppl_full": 13.986634758664158, "ppl_at_df": 13.986634758664158, "delta_ppl_df": 0.0, "knee_heuristic": 768, "df_ratio": 1.0, "sweep": { "64": 25.99515452550955, "128": 20.497028981644508, "256": 18.463651885474807, "512": 15.943368404335239, "768": 14.291077859966823, "1024": 14.35273375572969, "1280": 13.495131622333211, "1536": 13.658299116074033, "2048": 13.986634758664158 } }, { "model": "EleutherAI/pythia-1b", "tag": "pythia-1b", "gamma": 0.931, "phase": "A", "D_f": 445, "N_full": 2048, "f_target": 0.9, "ppl_full": 11.797592766397049, "ppl_at_df": 12.797310428095802, "delta_ppl_df": 0.9997176616987531, "knee_heuristic": 768, "df_ratio": 0.21728515625, "sweep": { "64": 20.82336859802798, "128": 16.34351822999527, "256": 14.961815199990014, "445": 12.797310428095802, "512": 13.00719843117948, "768": 11.944238442848128, "1024": 11.90498469103213, "1280": 11.308198053102004, "1536": 11.367630159231787, "2048": 11.797592766397049 } }, { "model": "EleutherAI/pythia-1.4b", "tag": "pythia-1.4b", "gamma": 0.705, "phase": "A", "D_f": 1433, "N_full": 2048, "f_target": 0.9, "ppl_full": 10.531278903927081, "ppl_at_df": 10.359741099193672, "delta_ppl_df": -0.17153780473340952, "knee_heuristic": 768, "df_ratio": 0.69970703125, "sweep": { "64": 19.10579824421483, "128": 14.338374193623926, "256": 13.28459133971229, "512": 11.393840662956944, "768": 10.782337109011515, "1024": 10.669041088499121, "1280": 10.046359061813407, "1433": 10.359741099193672, "1536": 10.097998537270568, "2048": 10.531278903927081 } }, { "model": "EleutherAI/pythia-2.8b", "tag": "pythia-2.8b", "gamma": 0.674, "phase": "A", "D_f": 1482, "N_full": 2048, "f_target": 0.9, "ppl_full": 9.207173515380948, "ppl_at_df": 8.86634974640192, "delta_ppl_df": -0.34082376897902833, "knee_heuristic": 768, "df_ratio": 0.7236328125, "sweep": { "64": 16.845721029085166, "128": 12.46358100808799, "256": 11.516145416329916, "512": 10.08633604056202, "768": 9.462097751139718, "1024": 9.502726676862784, "1280": 8.677919928249965, "1482": 8.86634974640192, "1536": 8.778613874635514, "2048": 9.207173515380948 } } ] }