File size: 5,195 Bytes
535348a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
{
  "config": {
    "N_full": 2048,
    "f_target": 0.9
  },
  "results": [
    {
      "model": "EleutherAI/pythia-14m",
      "tag": "pythia-14m",
      "gamma": 0.685,
      "phase": "A",
      "D_f": 1466,
      "N_full": 2048,
      "f_target": 0.9,
      "ppl_full": 78.36305186690407,
      "ppl_at_df": 82.73959301570233,
      "delta_ppl_df": 4.376541148798253,
      "knee_heuristic": 1280,
      "df_ratio": 0.7158203125,
      "sweep": {
        "64": 170.27565431091813,
        "128": 129.87243953384964,
        "256": 116.10089939684218,
        "512": 100.39440165679781,
        "768": 93.42492785289417,
        "1024": 87.69748565849753,
        "1280": 82.71468331445736,
        "1466": 82.73959301570233,
        "1536": 84.26238377642993,
        "2048": 78.36305186690407
      }
    },
    {
      "model": "EleutherAI/pythia-70m",
      "tag": "pythia-70m",
      "gamma": 0.748,
      "phase": "A",
      "D_f": 1348,
      "N_full": 2048,
      "f_target": 0.9,
      "ppl_full": 39.42330332269695,
      "ppl_at_df": 38.29037532881002,
      "delta_ppl_df": -1.1329279938869306,
      "knee_heuristic": 1024,
      "df_ratio": 0.658203125,
      "sweep": {
        "64": 78.26171993639424,
        "128": 60.39432733839407,
        "256": 53.77005947252492,
        "512": 46.90251454572783,
        "768": 43.56558823784614,
        "1024": 41.0769018008491,
        "1280": 39.57051656987743,
        "1348": 38.29037532881002,
        "1536": 41.25781274772638,
        "2048": 39.42330332269695
      }
    },
    {
      "model": "EleutherAI/pythia-160m",
      "tag": "pythia-160m",
      "gamma": 0.511,
      "phase": "A",
      "D_f": 1651,
      "N_full": 2048,
      "f_target": 0.9,
      "ppl_full": 22.49693114269694,
      "ppl_at_df": 23.941233262843955,
      "delta_ppl_df": 1.4443021201470145,
      "knee_heuristic": 1024,
      "df_ratio": 0.80615234375,
      "sweep": {
        "64": 45.05178022852514,
        "128": 35.39264061338946,
        "256": 30.868753730150203,
        "512": 27.10712340123499,
        "768": 25.202340040551977,
        "1024": 23.906966979465913,
        "1280": 22.180796411565147,
        "1536": 23.290909891590083,
        "1651": 23.941233262843955,
        "2048": 22.49693114269694
      }
    },
    {
      "model": "EleutherAI/pythia-410m",
      "tag": "pythia-410m",
      "gamma": 1.022,
      "phase": "B",
      "D_f": 2048,
      "N_full": 2048,
      "f_target": 0.9,
      "ppl_full": 13.986634758664158,
      "ppl_at_df": 13.986634758664158,
      "delta_ppl_df": 0.0,
      "knee_heuristic": 768,
      "df_ratio": 1.0,
      "sweep": {
        "64": 25.99515452550955,
        "128": 20.497028981644508,
        "256": 18.463651885474807,
        "512": 15.943368404335239,
        "768": 14.291077859966823,
        "1024": 14.35273375572969,
        "1280": 13.495131622333211,
        "1536": 13.658299116074033,
        "2048": 13.986634758664158
      }
    },
    {
      "model": "EleutherAI/pythia-1b",
      "tag": "pythia-1b",
      "gamma": 0.931,
      "phase": "A",
      "D_f": 445,
      "N_full": 2048,
      "f_target": 0.9,
      "ppl_full": 11.797592766397049,
      "ppl_at_df": 12.797310428095802,
      "delta_ppl_df": 0.9997176616987531,
      "knee_heuristic": 768,
      "df_ratio": 0.21728515625,
      "sweep": {
        "64": 20.82336859802798,
        "128": 16.34351822999527,
        "256": 14.961815199990014,
        "445": 12.797310428095802,
        "512": 13.00719843117948,
        "768": 11.944238442848128,
        "1024": 11.90498469103213,
        "1280": 11.308198053102004,
        "1536": 11.367630159231787,
        "2048": 11.797592766397049
      }
    },
    {
      "model": "EleutherAI/pythia-1.4b",
      "tag": "pythia-1.4b",
      "gamma": 0.705,
      "phase": "A",
      "D_f": 1433,
      "N_full": 2048,
      "f_target": 0.9,
      "ppl_full": 10.531278903927081,
      "ppl_at_df": 10.359741099193672,
      "delta_ppl_df": -0.17153780473340952,
      "knee_heuristic": 768,
      "df_ratio": 0.69970703125,
      "sweep": {
        "64": 19.10579824421483,
        "128": 14.338374193623926,
        "256": 13.28459133971229,
        "512": 11.393840662956944,
        "768": 10.782337109011515,
        "1024": 10.669041088499121,
        "1280": 10.046359061813407,
        "1433": 10.359741099193672,
        "1536": 10.097998537270568,
        "2048": 10.531278903927081
      }
    },
    {
      "model": "EleutherAI/pythia-2.8b",
      "tag": "pythia-2.8b",
      "gamma": 0.674,
      "phase": "A",
      "D_f": 1482,
      "N_full": 2048,
      "f_target": 0.9,
      "ppl_full": 9.207173515380948,
      "ppl_at_df": 8.86634974640192,
      "delta_ppl_df": -0.34082376897902833,
      "knee_heuristic": 768,
      "df_ratio": 0.7236328125,
      "sweep": {
        "64": 16.845721029085166,
        "128": 12.46358100808799,
        "256": 11.516145416329916,
        "512": 10.08633604056202,
        "768": 9.462097751139718,
        "1024": 9.502726676862784,
        "1280": 8.677919928249965,
        "1482": 8.86634974640192,
        "1536": 8.778613874635514,
        "2048": 9.207173515380948
      }
    }
  ]
}