diff --git "a/llama3.2-3b/base/metrics.jsonl" "b/llama3.2-3b/base/metrics.jsonl" new file mode 100644--- /dev/null +++ "b/llama3.2-3b/base/metrics.jsonl" @@ -0,0 +1,286 @@ +{"timestamp": 1774774630.9683797, "event": "train_step", "step": 5, "epoch": 1, "metrics": {"train/step_loss": 1.7376942915074967, "train/step_real_loss": 1.0279407650232315, "train/lr": 5.435673913043477e-05, "train/step_canary_loss": 13.09375, "perf/step_duration_sec": 5.826191591098905, "perf/samples_per_sec": 5.835716088009235, "perf/tokens_per_sec": 4558.895735694508, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 26561.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774658.4905767, "event": "train_step", "step": 10, "epoch": 1, "metrics": {"train/step_loss": 1.0866596028208733, "train/step_real_loss": 1.0866596028208733, "train/lr": 0.00010870347826086954, "perf/step_duration_sec": 5.38905462808907, "perf/samples_per_sec": 5.9379617035626575, "perf/tokens_per_sec": 5098.482367721487, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27476.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774666.4864554, "event": "eval_step", "step": 10, "epoch": 1, "metrics": {"eval/loss": 1.0521757644720566, "eval/duration_sec": 7.908603790681809}} +{"timestamp": 1774774694.3593774, "event": "train_step", "step": 15, "epoch": 1, "metrics": {"train/step_loss": 1.0033046528697014, "train/step_real_loss": 1.0033046528697014, "train/lr": 0.00016305021739130426, "perf/step_duration_sec": 5.1999532310292125, "perf/samples_per_sec": 6.153901502238382, "perf/tokens_per_sec": 5005.814253227034, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26030.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774722.9361515, "event": "train_step", "step": 20, "epoch": 1, "metrics": {"train/step_loss": 1.0057218000292778, "train/step_real_loss": 1.0057218000292778, "train/lr": 0.00021739695652173903, "perf/step_duration_sec": 5.397422701120377, "perf/samples_per_sec": 5.928755587987867, "perf/tokens_per_sec": 5308.274260982512, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28651.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774730.947466, "event": "eval_step", "step": 20, "epoch": 1, "metrics": {"eval/loss": 1.0178508827319512, "eval/duration_sec": 7.890270393807441}} +{"timestamp": 1774774758.4879467, "event": "train_step", "step": 25, "epoch": 1, "metrics": {"train/step_loss": 1.0515899360179901, "train/step_real_loss": 1.0515899360179901, "train/lr": 0.0002717436956521738, "perf/step_duration_sec": 5.5793046480976045, "perf/samples_per_sec": 5.73548175235621, "perf/tokens_per_sec": 4655.418844865631, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25974.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774785.7959807, "event": "train_step", "step": 30, "epoch": 1, "metrics": {"train/step_loss": 1.0019300878047943, "train/step_real_loss": 1.0019300878047943, "train/lr": 0.00032609043478260864, "perf/step_duration_sec": 5.581340129952878, "perf/samples_per_sec": 5.733390055959584, "perf/tokens_per_sec": 4649.779335383222, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25952.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774793.8749254, "event": "eval_step", "step": 30, "epoch": 1, "metrics": {"eval/loss": 0.9919830321883545, "eval/duration_sec": 8.061391711700708}} +{"timestamp": 1774774822.2809412, "event": "train_step", "step": 35, "epoch": 1, "metrics": {"train/step_loss": 1.0768323242664337, "train/step_real_loss": 1.0768323242664337, "train/lr": 0.00038043717391304343, "perf/step_duration_sec": 5.585242229048163, "perf/samples_per_sec": 5.729384454191066, "perf/tokens_per_sec": 4316.554056440388, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24109.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774850.0177305, "event": "train_step", "step": 40, "epoch": 1, "metrics": {"train/step_loss": 1.3942741365993725, "train/step_real_loss": 0.9833693876862526, "train/lr": 0.0004347839130434782, "train/step_canary_loss": 7.96875, "perf/step_duration_sec": 5.774645959958434, "perf/samples_per_sec": 5.887806843182596, "perf/tokens_per_sec": 4655.3503342728745, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 26883.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.309076309204102, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774857.956681, "event": "eval_step", "step": 40, "epoch": 1, "metrics": {"eval/loss": 0.9743371101526114, "eval/duration_sec": 7.913363870233297}} +{"timestamp": 1774774885.7185736, "event": "train_step", "step": 45, "epoch": 1, "metrics": {"train/step_loss": 1.0451335459947586, "train/step_real_loss": 1.0451335459947586, "train/lr": 0.000489130652173913, "perf/step_duration_sec": 5.165082773193717, "perf/samples_per_sec": 6.195447663699975, "perf/tokens_per_sec": 5063.229603158804, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26152.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774913.0309327, "event": "train_step", "step": 50, "epoch": 1, "metrics": {"train/step_loss": 0.9857069253921509, "train/step_real_loss": 0.9857069253921509, "train/lr": 0.0004999777821747893, "perf/step_duration_sec": 5.572589282877743, "perf/samples_per_sec": 5.742393414551963, "perf/tokens_per_sec": 4996.600069837027, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27844.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774921.0027752, "event": "eval_step", "step": 50, "epoch": 1, "metrics": {"eval/loss": 0.9664217413235935, "eval/duration_sec": 7.925320792943239}} +{"timestamp": 1774774948.7752097, "event": "train_step", "step": 55, "epoch": 1, "metrics": {"train/step_loss": 1.12283502925526, "train/step_real_loss": 0.9264782816171646, "train/lr": 0.0004998875302223462, "train/step_canary_loss": 7.40625, "perf/step_duration_sec": 5.315569802187383, "perf/samples_per_sec": 6.208177340916554, "perf/tokens_per_sec": 5397.3517548756345, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28690.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774975.8708904, "event": "train_step", "step": 60, "epoch": 1, "metrics": {"train/step_loss": 1.2486624211976023, "train/step_real_loss": 1.086511254310608, "train/lr": 0.0004997278849938866, "train/step_canary_loss": 6.4375, "perf/step_duration_sec": 5.90225095115602, "perf/samples_per_sec": 5.5910872433400325, "perf/tokens_per_sec": 3923.418402849247, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 23157.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774774983.9071195, "event": "eval_step", "step": 60, "epoch": 1, "metrics": {"eval/loss": 0.9574422362523202, "eval/duration_sec": 7.997456628829241}} +{"timestamp": 1774775012.725943, "event": "train_step", "step": 65, "epoch": 1, "metrics": {"train/step_loss": 1.105621099472046, "train/step_real_loss": 1.105621099472046, "train/lr": 0.0004994988986502189, "perf/step_duration_sec": 5.180609977804124, "perf/samples_per_sec": 6.17687881100898, "perf/tokens_per_sec": 4173.639801613631, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21622.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775040.087791, "event": "train_step", "step": 70, "epoch": 1, "metrics": {"train/step_loss": 0.9989377036690712, "train/step_real_loss": 0.9989377036690712, "train/lr": 0.000499200646007942, "perf/step_duration_sec": 5.574743082281202, "perf/samples_per_sec": 5.740174843520412, "perf/tokens_per_sec": 4310.153785628389, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24028.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775048.0976992, "event": "eval_step", "step": 70, "epoch": 1, "metrics": {"eval/loss": 0.9466325915776765, "eval/duration_sec": 7.969701467081904}} +{"timestamp": 1774775076.7830048, "event": "train_step", "step": 75, "epoch": 1, "metrics": {"train/step_loss": 2.0300879268085255, "train/step_real_loss": 1.0163434371352196, "train/lr": 0.0004988332245149987, "train/step_canary_loss": 18.25, "perf/step_duration_sec": 5.989594132173806, "perf/samples_per_sec": 5.676511504738697, "perf/tokens_per_sec": 4127.992557490125, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 24725.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775103.9787858, "event": "train_step", "step": 80, "epoch": 1, "metrics": {"train/step_loss": 0.9566473066806793, "train/step_real_loss": 0.9566473066806793, "train/lr": 0.0004983967542188372, "perf/step_duration_sec": 5.1745062889531255, "perf/samples_per_sec": 6.184164867731574, "perf/tokens_per_sec": 4881.238632161378, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25258.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775112.0318327, "event": "eval_step", "step": 80, "epoch": 1, "metrics": {"eval/loss": 0.9382078636151093, "eval/duration_sec": 7.88399560097605}} +{"timestamp": 1774775138.8300672, "event": "train_step", "step": 85, "epoch": 1, "metrics": {"train/step_loss": 1.063789241015911, "train/step_real_loss": 1.063789241015911, "train/lr": 0.0004978913777271885, "perf/step_duration_sec": 5.179419934283942, "perf/samples_per_sec": 6.178298034531548, "perf/tokens_per_sec": 4114.746490998011, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21312.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775166.545669, "event": "train_step", "step": 90, "epoch": 1, "metrics": {"train/step_loss": 1.1349567025899887, "train/step_real_loss": 1.1349567025899887, "train/lr": 0.0004973172601614717, "perf/step_duration_sec": 5.381766837090254, "perf/samples_per_sec": 5.946002673222714, "perf/tokens_per_sec": 4833.356922995914, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26012.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775174.6035292, "event": "eval_step", "step": 90, "epoch": 1, "metrics": {"eval/loss": 0.9297710601717997, "eval/duration_sec": 7.9691899609752}} +{"timestamp": 1774775202.9612648, "event": "train_step", "step": 95, "epoch": 1, "metrics": {"train/step_loss": 0.9695673882961273, "train/step_real_loss": 0.9695673882961273, "train/lr": 0.0004966745891028439, "perf/step_duration_sec": 5.979987022001296, "perf/samples_per_sec": 5.3511821818788325, "perf/tokens_per_sec": 4381.614860299662, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26202.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775230.8221097, "event": "train_step", "step": 100, "epoch": 1, "metrics": {"train/step_loss": 1.4314525197534, "train/step_real_loss": 0.9916213899850845, "train/lr": 0.0004959635745309124, "train/step_canary_loss": 8.46875, "perf/step_duration_sec": 5.961604126729071, "perf/samples_per_sec": 5.703162987216771, "perf/tokens_per_sec": 4744.025164837198, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 28282.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775238.7748277, "event": "eval_step", "step": 100, "epoch": 1, "metrics": {"eval/loss": 0.9203244275771655, "eval/duration_sec": 7.905110601801425}} +{"timestamp": 1774775266.3180938, "event": "train_step", "step": 105, "epoch": 1, "metrics": {"train/step_loss": 1.0869614210995762, "train/step_real_loss": 0.936358630657196, "train/lr": 0.0004951844487551275, "train/step_canary_loss": 5.90625, "perf/step_duration_sec": 5.5539976679719985, "perf/samples_per_sec": 5.941666160628711, "perf/tokens_per_sec": 4291.323371891658, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 23834.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775294.6243305, "event": "train_step", "step": 110, "epoch": 1, "metrics": {"train/step_loss": 0.9668704345822334, "train/step_real_loss": 0.9668704345822334, "train/lr": 0.0004943374663388811, "perf/step_duration_sec": 5.608877829276025, "perf/samples_per_sec": 5.705241043577954, "perf/tokens_per_sec": 4236.676341204467, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23763.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775302.694821, "event": "eval_step", "step": 110, "epoch": 1, "metrics": {"eval/loss": 0.9109133751346514, "eval/duration_sec": 7.881609035190195}} +{"timestamp": 1774775329.808947, "event": "train_step", "step": 115, "epoch": 1, "metrics": {"train/step_loss": 0.8276027888059616, "train/step_real_loss": 0.8276027888059616, "train/lr": 0.0004934229040163317, "perf/step_duration_sec": 5.393389719072729, "perf/samples_per_sec": 5.933188897297351, "perf/tokens_per_sec": 5017.252861277073, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27060.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775358.0881875, "event": "train_step", "step": 120, "epoch": 1, "metrics": {"train/step_loss": 0.9285370782017708, "train/step_real_loss": 0.9285370782017708, "train/lr": 0.0004924410606019895, "perf/step_duration_sec": 5.771311984863132, "perf/samples_per_sec": 5.5446664612706575, "perf/tokens_per_sec": 4637.420411545247, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26764.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775366.145451, "event": "eval_step", "step": 120, "epoch": 1, "metrics": {"eval/loss": 0.9056128280667157, "eval/duration_sec": 7.9613511129282415}} +{"timestamp": 1774775394.2712746, "event": "train_step", "step": 125, "epoch": 1, "metrics": {"train/step_loss": 1.0212737768888474, "train/step_real_loss": 1.0212737768888474, "train/lr": 0.0004913922568930828, "perf/step_duration_sec": 5.368122776038945, "perf/samples_per_sec": 5.961115521208758, "perf/tokens_per_sec": 4052.2545604016914, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21753.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775421.7204714, "event": "train_step", "step": 130, "epoch": 1, "metrics": {"train/step_loss": 1.0224003419280052, "train/step_real_loss": 1.0224003419280052, "train/lr": 0.0004902768355647447, "perf/step_duration_sec": 5.571536075789481, "perf/samples_per_sec": 5.743478919404758, "perf/tokens_per_sec": 4874.598249128558, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27159.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775429.7657063, "event": "eval_step", "step": 130, "epoch": 1, "metrics": {"eval/loss": 0.9001456789481336, "eval/duration_sec": 8.018211482092738}} +{"timestamp": 1774775456.8890867, "event": "train_step", "step": 135, "epoch": 1, "metrics": {"train/step_loss": 0.8972410336136818, "train/step_real_loss": 0.8972410336136818, "train/lr": 0.0004890951610580518, "perf/step_duration_sec": 5.17974199866876, "perf/samples_per_sec": 6.177913882240522, "perf/tokens_per_sec": 4811.629615221266, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24923.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775484.432076, "event": "train_step", "step": 140, "epoch": 1, "metrics": {"train/step_loss": 0.8545509427785873, "train/step_real_loss": 0.8545509427785873, "train/lr": 0.00048784761946094984, "perf/step_duration_sec": 5.174998348113149, "perf/samples_per_sec": 6.183576853056868, "perf/tokens_per_sec": 4906.668232900625, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25392.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775492.4296482, "event": "eval_step", "step": 140, "epoch": 1, "metrics": {"eval/loss": 0.8939431065168136, "eval/duration_sec": 7.929781843908131}} +{"timestamp": 1774775519.0743253, "event": "train_step", "step": 145, "epoch": 1, "metrics": {"train/step_loss": 0.8470374792814255, "train/step_real_loss": 0.8470374792814255, "train/lr": 0.00048653461838210686, "perf/step_duration_sec": 5.36817417293787, "perf/samples_per_sec": 5.961058447268521, "perf/tokens_per_sec": 4559.837146007465, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24478.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775546.5810082, "event": "train_step", "step": 150, "epoch": 1, "metrics": {"train/step_loss": 0.8461332246661186, "train/step_real_loss": 0.8461332246661186, "train/lr": 0.0004851565868177367, "perf/step_duration_sec": 5.567754249088466, "perf/samples_per_sec": 5.7473801048670445, "perf/tokens_per_sec": 4746.078727222239, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26425.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775554.5955617, "event": "eval_step", "step": 150, "epoch": 1, "metrics": {"eval/loss": 0.8901280065377553, "eval/duration_sec": 7.92262617778033}} +{"timestamp": 1774775582.1919334, "event": "train_step", "step": 155, "epoch": 1, "metrics": {"train/step_loss": 1.1260311097809763, "train/step_real_loss": 0.8858289495110512, "train/lr": 0.0004837139750114322, "train/step_canary_loss": 8.8125, "perf/step_duration_sec": 5.765269848983735, "perf/samples_per_sec": 5.723929818448486, "perf/tokens_per_sec": 4892.052018167306, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28204.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775610.684761, "event": "train_step", "step": 160, "epoch": 1, "metrics": {"train/step_loss": 0.864671066403389, "train/step_real_loss": 0.864671066403389, "train/lr": 0.00048220725430705806, "perf/step_duration_sec": 5.361798626836389, "perf/samples_per_sec": 5.968146554373843, "perf/tokens_per_sec": 4825.806002950725, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25875.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775618.699523, "event": "eval_step", "step": 160, "epoch": 1, "metrics": {"eval/loss": 0.8862609028434142, "eval/duration_sec": 7.936493613757193}} +{"timestamp": 1774775645.782651, "event": "train_step", "step": 165, "epoch": 1, "metrics": {"train/step_loss": 0.9830832034349442, "train/step_real_loss": 0.9830832034349442, "train/lr": 0.00048063691699474804, "perf/step_duration_sec": 5.39114395994693, "perf/samples_per_sec": 5.935660453095191, "perf/tokens_per_sec": 4684.163544437902, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25253.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775673.4212196, "event": "train_step", "step": 170, "epoch": 1, "metrics": {"train/step_loss": 0.9654412195086479, "train/step_real_loss": 0.9654412195086479, "train/lr": 0.00047900347615006017, "perf/step_duration_sec": 5.577466246206313, "perf/samples_per_sec": 5.7373722381136405, "perf/tokens_per_sec": 5242.882468341221, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29242.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775681.4606564, "event": "eval_step", "step": 170, "epoch": 1, "metrics": {"eval/loss": 0.8838135512211385, "eval/duration_sec": 7.941723851952702}} +{"timestamp": 1774775709.48593, "event": "train_step", "step": 175, "epoch": 1, "metrics": {"train/step_loss": 0.8205339536070824, "train/step_real_loss": 0.8205339536070824, "train/lr": 0.00047730746546633914, "perf/step_duration_sec": 5.358594580087811, "perf/samples_per_sec": 5.9717150685200036, "perf/tokens_per_sec": 4261.191933580805, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22834.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775736.9742084, "event": "train_step", "step": 180, "epoch": 1, "metrics": {"train/step_loss": 0.8968394175171852, "train/step_real_loss": 0.8968394175171852, "train/lr": 0.0004755494390803433, "perf/step_duration_sec": 5.98482184484601, "perf/samples_per_sec": 5.34685924319663, "perf/tokens_per_sec": 4289.852006557197, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25674.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.76085042953491}} +{"timestamp": 1774775745.0749848, "event": "eval_step", "step": 180, "epoch": 1, "metrics": {"eval/loss": 0.8778814083108535, "eval/duration_sec": 7.924314956646413}} +{"timestamp": 1774775769.694859, "event": "train_epoch", "step": 183, "epoch": 1, "metrics": {"train/epoch_loss": 1.0324704680727712, "train/epoch_real_loss": 0.9777002134665723, "train/epoch_canary_loss": 8.240353565705128, "perf/epoch_duration_sec": 1159.8850529491901, "perf/epoch_samples_per_sec": 40.70489561008957, "perf/epoch_tokens_per_sec": 32071.111620428415, "perf/epoch_samples": 47213.0, "perf/epoch_tokens": 37198803.0, "system/cuda_epoch_peak_memory_gb": 40.76085042953491, "eval/loss": 0.8772943461170563, "eval/duration_sec": 8.013256915379316}} +{"timestamp": 1774775781.8219402, "event": "audit_epoch", "step": 183, "epoch": 1, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.636296, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.548, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 7.395085104741156}} +{"timestamp": 1774775793.305373, "event": "train_step", "step": 185, "epoch": 2, "metrics": {"train/step_loss": 0.8076266273856163, "train/step_real_loss": 0.8076266273856163, "train/lr": 0.0004737299713911917, "perf/step_duration_sec": 5.364810147322714, "perf/samples_per_sec": 5.964796352759932, "perf/tokens_per_sec": 4684.6019355488315, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25132.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 35.8866229057312}} +{"timestamp": 1774775820.63205, "event": "train_step", "step": 190, "epoch": 2, "metrics": {"train/step_loss": 0.7448036782443523, "train/step_real_loss": 0.7448036782443523, "train/lr": 0.00047184965687269083, "perf/step_duration_sec": 5.167379370890558, "perf/samples_per_sec": 6.192694149817192, "perf/tokens_per_sec": 4981.4418784513855, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25741.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774775828.6591399, "event": "eval_step", "step": 190, "epoch": 2, "metrics": {"eval/loss": 0.8794935825161444, "eval/duration_sec": 7.939059536904097}} +{"timestamp": 1774775855.9437473, "event": "train_step", "step": 195, "epoch": 2, "metrics": {"train/step_loss": 0.7590245306491852, "train/step_real_loss": 0.7590245306491852, "train/lr": 0.00046990910987910263, "perf/step_duration_sec": 5.56153394235298, "perf/samples_per_sec": 5.753808271547005, "perf/tokens_per_sec": 4379.187514172728, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24355.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774775884.7322023, "event": "train_step", "step": 200, "epoch": 2, "metrics": {"train/step_loss": 0.652218334376812, "train/step_real_loss": 0.652218334376812, "train/lr": 0.00046790896444441697, "perf/step_duration_sec": 5.36213842080906, "perf/samples_per_sec": 5.967768358201337, "perf/tokens_per_sec": 5338.728274694678, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28627.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774775892.7094324, "event": "eval_step", "step": 200, "epoch": 2, "metrics": {"eval/loss": 0.8785055088691223, "eval/duration_sec": 7.934078328777105}} +{"timestamp": 1774775920.5316396, "event": "train_step", "step": 205, "epoch": 2, "metrics": {"train/step_loss": 0.8499046936631203, "train/step_real_loss": 0.8499046936631203, "train/lr": 0.0004658498740751934, "perf/step_duration_sec": 5.365024707745761, "perf/samples_per_sec": 5.964557806005994, "perf/tokens_per_sec": 4599.978815450685, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24679.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774775948.0270805, "event": "train_step", "step": 210, "epoch": 2, "metrics": {"train/step_loss": 0.6906371340155602, "train/step_real_loss": 0.6906371340155602, "train/lr": 0.0004637325115370421, "perf/step_duration_sec": 5.1527633420191705, "perf/samples_per_sec": 6.210259985947739, "perf/tokens_per_sec": 4914.25635513027, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25322.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774775956.2007813, "event": "eval_step", "step": 210, "epoch": 2, "metrics": {"eval/loss": 0.8737284688231273, "eval/duration_sec": 7.978559153154492}} +{"timestamp": 1774775985.7674499, "event": "train_step", "step": 215, "epoch": 2, "metrics": {"train/step_loss": 0.8288030922412872, "train/step_real_loss": 0.8288030922412872, "train/lr": 0.0004615575686348109, "perf/step_duration_sec": 5.373444939032197, "perf/samples_per_sec": 5.95521129611937, "perf/tokens_per_sec": 5301.62685637027, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28488.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776013.8964658, "event": "train_step", "step": 220, "epoch": 2, "metrics": {"train/step_loss": 0.7316618040204048, "train/step_real_loss": 0.7316618040204048, "train/lr": 0.00045932575598655267, "perf/step_duration_sec": 5.566553776152432, "perf/samples_per_sec": 5.748619574482618, "perf/tokens_per_sec": 4976.867396608326, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27704.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776022.0051532, "event": "eval_step", "step": 220, "epoch": 2, "metrics": {"eval/loss": 0.8718125514495066, "eval/duration_sec": 7.929856379982084}} +{"timestamp": 1774776049.111988, "event": "train_step", "step": 225, "epoch": 2, "metrics": {"train/step_loss": 0.7060727328062057, "train/step_real_loss": 0.7060727328062057, "train/lr": 0.0004570378027913448, "perf/step_duration_sec": 5.559268434997648, "perf/samples_per_sec": 5.756153057576458, "perf/tokens_per_sec": 4764.655693408913, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26488.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776077.183969, "event": "train_step", "step": 230, "epoch": 2, "metrics": {"train/step_loss": 0.7760453596711159, "train/step_real_loss": 0.7760453596711159, "train/lr": 0.0004546944565910387, "perf/step_duration_sec": 5.78074945975095, "perf/samples_per_sec": 5.535614408270626, "perf/tokens_per_sec": 4541.971621986049, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26256.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776085.2376785, "event": "eval_step", "step": 230, "epoch": 2, "metrics": {"eval/loss": 0.8689703196287155, "eval/duration_sec": 8.007632470224053}} +{"timestamp": 1774776112.9358788, "event": "train_step", "step": 235, "epoch": 2, "metrics": {"train/step_loss": 0.7558778375387192, "train/step_real_loss": 0.7558778375387192, "train/lr": 0.0004522964830260161, "perf/step_duration_sec": 5.367463030852377, "perf/samples_per_sec": 5.96184823557476, "perf/tokens_per_sec": 5454.9048277935435, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29279.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776141.1622984, "event": "train_step", "step": 240, "epoch": 2, "metrics": {"train/step_loss": 0.76904394048633, "train/step_real_loss": 0.7086039036512375, "train/lr": 0.00044984466558503123, "train/step_canary_loss": 2.703125, "perf/step_duration_sec": 5.580194680951536, "perf/samples_per_sec": 5.913772168675096, "perf/tokens_per_sec": 5060.038513779092, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28236.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776149.1531231, "event": "eval_step", "step": 240, "epoch": 2, "metrics": {"eval/loss": 0.8667729342213044, "eval/duration_sec": 7.93614345183596}} +{"timestamp": 1774776177.126122, "event": "train_step", "step": 245, "epoch": 2, "metrics": {"train/step_loss": 0.8343641793026644, "train/step_real_loss": 0.7287970781326294, "train/lr": 0.0004473398053492222, "train/step_canary_loss": 2.5234375, "perf/step_duration_sec": 5.583012884948403, "perf/samples_per_sec": 6.089901761047829, "perf/tokens_per_sec": 4903.803835704925, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 27378.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.309076309204102, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776204.2368739, "event": "train_step", "step": 250, "epoch": 2, "metrics": {"train/step_loss": 0.8555259630084038, "train/step_real_loss": 0.8555259630084038, "train/lr": 0.00044478272073037416, "perf/step_duration_sec": 5.37336559779942, "perf/samples_per_sec": 5.955299228681762, "perf/tokens_per_sec": 4878.320583794844, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26213.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776212.2683573, "event": "eval_step", "step": 250, "epoch": 2, "metrics": {"eval/loss": 0.863982966886117, "eval/duration_sec": 7.995873822830617}} +{"timestamp": 1774776241.0378666, "event": "train_step", "step": 255, "epoch": 2, "metrics": {"train/step_loss": 0.7277687228087223, "train/step_real_loss": 0.7501510009169579, "train/lr": 0.0004421742472035195, "train/step_canary_loss": 0.01153564453125, "perf/step_duration_sec": 5.587382384110242, "perf/samples_per_sec": 5.906164592179609, "perf/tokens_per_sec": 3921.514314522649, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 21911.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.309076309204102, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776270.3138738, "event": "train_step", "step": 260, "epoch": 2, "metrics": {"train/step_loss": 0.8642720058560371, "train/step_real_loss": 0.8642720058560371, "train/lr": 0.0004395152370339644, "perf/step_duration_sec": 5.579927761107683, "perf/samples_per_sec": 5.734841268562877, "perf/tokens_per_sec": 4596.475276753146, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25648.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776278.4397972, "event": "eval_step", "step": 260, "epoch": 2, "metrics": {"eval/loss": 0.8662478572283037, "eval/duration_sec": 8.105569439008832}} +{"timestamp": 1774776306.3901358, "event": "train_step", "step": 265, "epoch": 2, "metrics": {"train/step_loss": 0.718508817255497, "train/step_real_loss": 0.718508817255497, "train/lr": 0.0004368065589988278, "perf/step_duration_sec": 5.168991433922201, "perf/samples_per_sec": 6.190762822703806, "perf/tokens_per_sec": 4644.426346395321, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24007.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776333.5423276, "event": "train_step", "step": 270, "epoch": 2, "metrics": {"train/step_loss": 0.8245533406734467, "train/step_real_loss": 0.8245533406734467, "train/lr": 0.0004340490981031868, "perf/step_duration_sec": 5.38081661472097, "perf/samples_per_sec": 5.947052704315106, "perf/tokens_per_sec": 5171.705707990024, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27828.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776341.6446574, "event": "eval_step", "step": 270, "epoch": 2, "metrics": {"eval/loss": 0.8661514190145027, "eval/duration_sec": 7.946869337931275}} +{"timestamp": 1774776369.635367, "event": "train_step", "step": 275, "epoch": 2, "metrics": {"train/step_loss": 0.7141601741313934, "train/step_real_loss": 0.7141601741313934, "train/lr": 0.00043124375529091905, "perf/step_duration_sec": 5.573423901107162, "perf/samples_per_sec": 5.741533493198533, "perf/tokens_per_sec": 4624.446382928093, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25774.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776397.8161643, "event": "train_step", "step": 280, "epoch": 2, "metrics": {"train/step_loss": 0.7535178437829018, "train/step_real_loss": 0.7535178437829018, "train/lr": 0.00042839144715033767, "perf/step_duration_sec": 5.378484238870442, "perf/samples_per_sec": 5.949631639474778, "perf/tokens_per_sec": 5157.029149503497, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27737.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776405.8194575, "event": "eval_step", "step": 280, "epoch": 2, "metrics": {"eval/loss": 0.861811788036273, "eval/duration_sec": 7.961057364009321}} +{"timestamp": 1774776433.6981711, "event": "train_step", "step": 285, "epoch": 2, "metrics": {"train/step_loss": 0.709605872631073, "train/step_real_loss": 0.709605872631073, "train/lr": 0.0004254931056147153, "perf/step_duration_sec": 5.163066201843321, "perf/samples_per_sec": 6.197867458793254, "perf/tokens_per_sec": 5321.643946806357, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27476.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776461.6529195, "event": "train_step", "step": 290, "epoch": 2, "metrics": {"train/step_loss": 0.7202157080173492, "train/step_real_loss": 0.7202157080173492, "train/lr": 0.00042254967765779363, "perf/step_duration_sec": 5.372432867996395, "perf/samples_per_sec": 5.956333152271504, "perf/tokens_per_sec": 4680.747180630359, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25147.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776469.7127335, "event": "eval_step", "step": 290, "epoch": 2, "metrics": {"eval/loss": 0.8544893343097125, "eval/duration_sec": 8.03930533118546}} +{"timestamp": 1774776498.0963783, "event": "train_step", "step": 295, "epoch": 2, "metrics": {"train/step_loss": 0.7784466817975044, "train/step_real_loss": 0.7784466817975044, "train/lr": 0.000419562124984379, "perf/step_duration_sec": 5.5765793598257005, "perf/samples_per_sec": 5.738284696624524, "perf/tokens_per_sec": 4399.291826946545, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24533.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776526.0133624, "event": "train_step", "step": 300, "epoch": 2, "metrics": {"train/step_loss": 0.6268721421559652, "train/step_real_loss": 0.5993427522480488, "train/lr": 0.00041653142371612543, "train/step_canary_loss": 1.5078125, "perf/step_duration_sec": 5.989714682102203, "perf/samples_per_sec": 5.509444397845345, "perf/tokens_per_sec": 4500.715214458025, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26958.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776534.003708, "event": "eval_step", "step": 300, "epoch": 2, "metrics": {"eval/loss": 0.8525437855949769, "eval/duration_sec": 7.982475476805121}} +{"timestamp": 1774776562.38724, "event": "train_step", "step": 305, "epoch": 2, "metrics": {"train/step_loss": 0.7516753822565079, "train/step_real_loss": 0.7516753822565079, "train/lr": 0.0004134585640726066, "perf/step_duration_sec": 5.571442970074713, "perf/samples_per_sec": 5.7435749000533844, "perf/tokens_per_sec": 4432.603929116199, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24696.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776590.628121, "event": "train_step", "step": 310, "epoch": 2, "metrics": {"train/step_loss": 0.7619218302495552, "train/step_real_loss": 0.7694965302944183, "train/lr": 0.000410344550047782, "train/step_canary_loss": 0.51953125, "perf/step_duration_sec": 5.761852690950036, "perf/samples_per_sec": 5.727324485721768, "perf/tokens_per_sec": 3908.6386285739372, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 22521.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776598.607028, "event": "eval_step", "step": 310, "epoch": 2, "metrics": {"eval/loss": 0.8489250848308587, "eval/duration_sec": 7.955447633285075}} +{"timestamp": 1774776625.8867862, "event": "train_step", "step": 315, "epoch": 2, "metrics": {"train/step_loss": 0.7901253327727318, "train/step_real_loss": 0.7901253327727318, "train/lr": 0.00040719039908196234, "perf/step_duration_sec": 5.368590538389981, "perf/samples_per_sec": 5.960596132480737, "perf/tokens_per_sec": 4297.217274260332, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23070.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776654.1846123, "event": "train_step", "step": 320, "epoch": 2, "metrics": {"train/step_loss": 0.8376321135198369, "train/step_real_loss": 0.7416381686925888, "train/lr": 0.0004039971417293818, "train/step_canary_loss": 2.37353515625, "perf/step_duration_sec": 5.967966071330011, "perf/samples_per_sec": 5.697083326819722, "perf/tokens_per_sec": 4238.797556428191, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 25297.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776662.308549, "event": "eval_step", "step": 320, "epoch": 2, "metrics": {"eval/loss": 0.845888259032598, "eval/duration_sec": 7.9704041220247746}} +{"timestamp": 1774776690.1333847, "event": "train_step", "step": 325, "epoch": 2, "metrics": {"train/step_loss": 0.7891391478478909, "train/step_real_loss": 0.7891391478478909, "train/lr": 0.0004007658213214857, "perf/step_duration_sec": 5.573860913980752, "perf/samples_per_sec": 5.741083334127577, "perf/tokens_per_sec": 4746.081828781591, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26454.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776716.8682008, "event": "train_step", "step": 330, "epoch": 2, "metrics": {"train/step_loss": 0.7093489803373814, "train/step_real_loss": 0.7093489803373814, "train/lr": 0.0003974974936260431, "perf/step_duration_sec": 5.1670702011324465, "perf/samples_per_sec": 6.1930646874096436, "perf/tokens_per_sec": 5235.268526847446, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27051.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776724.9288714, "event": "eval_step", "step": 330, "epoch": 2, "metrics": {"eval/loss": 0.8443809017921104, "eval/duration_sec": 7.985958245117217}} +{"timestamp": 1774776753.2853484, "event": "train_step", "step": 335, "epoch": 2, "metrics": {"train/step_loss": 0.730043888092041, "train/step_real_loss": 0.730043888092041, "train/lr": 0.0003941932265021964, "perf/step_duration_sec": 5.164687369950116, "perf/samples_per_sec": 6.195921980909578, "perf/tokens_per_sec": 4690.893807234261, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24227.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776781.4298496, "event": "train_step", "step": 340, "epoch": 2, "metrics": {"train/step_loss": 0.8819202110171318, "train/step_real_loss": 0.8819202110171318, "train/lr": 0.0003908540995515602, "perf/step_duration_sec": 5.572398900985718, "perf/samples_per_sec": 5.742589604333499, "perf/tokens_per_sec": 4592.815491990852, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25593.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776789.4790769, "event": "eval_step", "step": 340, "epoch": 2, "metrics": {"eval/loss": 0.841956962664158, "eval/duration_sec": 7.9737810720689595}} +{"timestamp": 1774776816.717731, "event": "train_step", "step": 345, "epoch": 2, "metrics": {"train/step_loss": 0.8309179916977882, "train/step_real_loss": 0.8309179916977882, "train/lr": 0.0003874812037654838, "perf/step_duration_sec": 5.362099436111748, "perf/samples_per_sec": 5.967811746364099, "perf/tokens_per_sec": 4013.3533994298564, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21520.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776844.9900293, "event": "train_step", "step": 350, "epoch": 2, "metrics": {"train/step_loss": 0.7020573616027832, "train/step_real_loss": 0.7020573616027832, "train/lr": 0.0003840756411685917, "perf/step_duration_sec": 5.969969522673637, "perf/samples_per_sec": 5.360161367401567, "perf/tokens_per_sec": 4397.174876739329, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26251.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776853.1197565, "event": "eval_step", "step": 350, "epoch": 2, "metrics": {"eval/loss": 0.8392755987170415, "eval/duration_sec": 7.955886598676443}} +{"timestamp": 1774776881.408301, "event": "train_step", "step": 355, "epoch": 2, "metrics": {"train/step_loss": 0.692126452922821, "train/step_real_loss": 0.692126452922821, "train/lr": 0.0003806385244587199, "perf/step_duration_sec": 5.780137637164444, "perf/samples_per_sec": 5.5362003482841295, "perf/tokens_per_sec": 4392.628963841689, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25390.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776908.6903682, "event": "train_step", "step": 360, "epoch": 2, "metrics": {"train/step_loss": 0.7023879587650299, "train/step_real_loss": 0.7023879587650299, "train/lr": 0.0003771709766433647, "perf/step_duration_sec": 5.374140535015613, "perf/samples_per_sec": 5.954440489879566, "perf/tokens_per_sec": 5119.888439969884, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27515.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776916.9197178, "event": "eval_step", "step": 360, "epoch": 2, "metrics": {"eval/loss": 0.8363649192719886, "eval/duration_sec": 8.086796815041453}} +{"timestamp": 1774776944.4466727, "event": "train_step", "step": 365, "epoch": 2, "metrics": {"train/step_loss": 0.6544849425554276, "train/step_real_loss": 0.6544849425554276, "train/lr": 0.0003736741306727628, "perf/step_duration_sec": 5.381015019025654, "perf/samples_per_sec": 5.9468334295402645, "perf/tokens_per_sec": 5396.565498763117, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29039.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.565303325653076}} +{"timestamp": 1774776958.9153988, "event": "train_epoch", "step": 366, "epoch": 2, "metrics": {"train/epoch_loss": 0.7737582686968845, "train/epoch_real_loss": 0.7517354765453992, "train/epoch_canary_loss": 3.424581639159089, "perf/epoch_duration_sec": 1168.899031253066, "perf/epoch_samples_per_sec": 40.4380521637771, "perf/epoch_tokens_per_sec": 31826.947414028342, "perf/epoch_samples": 47268.0, "perf/epoch_tokens": 37202488.0, "system/cuda_epoch_peak_memory_gb": 45.565303325653076, "eval/loss": 0.8350773404997128, "eval/duration_sec": 8.141189494170249}} +{"timestamp": 1774776973.4443514, "event": "audit_epoch", "step": 366, "epoch": 2, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.964016, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.628, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 9.227223775815219}} +{"timestamp": 1774776996.0105424, "event": "train_step", "step": 370, "epoch": 3, "metrics": {"train/step_loss": 0.6793505772948265, "train/step_real_loss": 0.6793505772948265, "train/lr": 0.00037014912906972323, "perf/step_duration_sec": 5.171965967863798, "perf/samples_per_sec": 6.187202351839355, "perf/tokens_per_sec": 5436.617366531844, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28118.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 35.8866229057312}} +{"timestamp": 1774777004.047985, "event": "eval_step", "step": 370, "epoch": 3, "metrics": {"eval/loss": 0.85083808692602, "eval/duration_sec": 7.9939369317144156}} +{"timestamp": 1774777031.9168267, "event": "train_step", "step": 375, "epoch": 3, "metrics": {"train/step_loss": 0.6320050490253112, "train/step_real_loss": 0.6714899837970734, "train/lr": 0.00036659712355633126, "train/step_canary_loss": 0.0002460479736328125, "perf/step_duration_sec": 5.9753015651367605, "perf/samples_per_sec": 5.69008938366809, "perf/tokens_per_sec": 4577.342198020762, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 27351.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777060.0935109, "event": "train_step", "step": 380, "epoch": 3, "metrics": {"train/step_loss": 0.5950790122151375, "train/step_real_loss": 0.5950790122151375, "train/lr": 0.0003630192746776469, "perf/step_duration_sec": 5.1691784099675715, "perf/samples_per_sec": 6.1905388945940345, "perf/tokens_per_sec": 5067.34299390613, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26194.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777068.2327735, "event": "eval_step", "step": 380, "epoch": 3, "metrics": {"eval/loss": 0.8463462744003687, "eval/duration_sec": 8.088413145858794}} +{"timestamp": 1774777099.0571158, "event": "train_step", "step": 385, "epoch": 3, "metrics": {"train/step_loss": 0.5793699510395527, "train/step_real_loss": 0.5793699510395527, "train/lr": 0.00035941675142252066, "perf/step_duration_sec": 5.584926486946642, "perf/samples_per_sec": 5.729708363179342, "perf/tokens_per_sec": 4185.731012688859, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23377.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777128.6548388, "event": "train_step", "step": 390, "epoch": 3, "metrics": {"train/step_loss": 0.5417481484738264, "train/step_real_loss": 0.5586716085672379, "train/lr": 0.00035579073084164993, "train/step_canary_loss": 0.00019741058349609375, "perf/step_duration_sec": 5.364840171765536, "perf/samples_per_sec": 6.151161813482302, "perf/tokens_per_sec": 5007.0457161745935, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26862.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777136.6758382, "event": "eval_step", "step": 390, "epoch": 3, "metrics": {"eval/loss": 0.8420420464796898, "eval/duration_sec": 7.974577283021063}} +{"timestamp": 1774777165.6365674, "event": "train_step", "step": 395, "epoch": 3, "metrics": {"train/step_loss": 0.6436724215745926, "train/step_real_loss": 0.6436724215745926, "train/lr": 0.00035214239766300225, "perf/step_duration_sec": 5.980619905050844, "perf/samples_per_sec": 5.3506159073869375, "perf/tokens_per_sec": 4509.398762697448, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26969.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 12.736580848693848, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777192.7709749, "event": "train_step", "step": 400, "epoch": 3, "metrics": {"train/step_loss": 0.9554073015848795, "train/step_real_loss": 0.6395606175065041, "train/lr": 0.0003484729439047296, "train/step_canary_loss": 11.0625, "perf/step_duration_sec": 5.578697599936277, "perf/samples_per_sec": 5.915359169204107, "perf/tokens_per_sec": 4198.291730361497, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 23421.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777200.8325684, "event": "eval_step", "step": 400, "epoch": 3, "metrics": {"eval/loss": 0.8426188268722632, "eval/duration_sec": 8.041196098085493}} +{"timestamp": 1774777228.1378868, "event": "train_step", "step": 405, "epoch": 3, "metrics": {"train/step_loss": 0.5946458168327808, "train/step_real_loss": 0.5946458168327808, "train/lr": 0.00034478356848570077, "perf/step_duration_sec": 5.1746170511469245, "perf/samples_per_sec": 6.1840324962612225, "perf/tokens_per_sec": 4673.969061080435, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24186.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777256.508003, "event": "train_step", "step": 410, "epoch": 3, "metrics": {"train/step_loss": 0.6560854762792587, "train/step_real_loss": 0.6560854762792587, "train/lr": 0.00034107547683377915, "perf/step_duration_sec": 5.782356640789658, "perf/samples_per_sec": 5.534075808169102, "perf/tokens_per_sec": 3950.2924878187073, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22842.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777264.5858586, "event": "eval_step", "step": 410, "epoch": 3, "metrics": {"eval/loss": 0.8417158822218578, "eval/duration_sec": 7.972334961406887}} +{"timestamp": 1774777291.870058, "event": "train_step", "step": 415, "epoch": 3, "metrics": {"train/step_loss": 0.5726269707083702, "train/step_real_loss": 0.5726269707083702, "train/lr": 0.0003373498804919741, "perf/step_duration_sec": 5.355022876057774, "perf/samples_per_sec": 5.975698095160623, "perf/tokens_per_sec": 4631.352764314956, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24801.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777320.1780016, "event": "train_step", "step": 420, "epoch": 3, "metrics": {"train/step_loss": 0.6296986106670264, "train/step_real_loss": 0.6493259072303772, "train/lr": 0.0003336079967225938, "train/step_canary_loss": 0.00162506103515625, "perf/step_duration_sec": 5.9810437597334385, "perf/samples_per_sec": 5.517431626594676, "perf/tokens_per_sec": 4530.814534820883, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27099.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777328.2612987, "event": "eval_step", "step": 420, "epoch": 3, "metrics": {"eval/loss": 0.8406590496500335, "eval/duration_sec": 7.939520922955126}} +{"timestamp": 1774777356.036317, "event": "train_step", "step": 425, "epoch": 3, "metrics": {"train/step_loss": 0.6213079504668713, "train/step_real_loss": 0.6213079504668713, "train/lr": 0.00032985104810953, "perf/step_duration_sec": 5.376790544018149, "perf/samples_per_sec": 5.951505779893364, "perf/tokens_per_sec": 4453.400184360831, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23945.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777384.6369548, "event": "train_step", "step": 430, "epoch": 3, "metrics": {"train/step_loss": 0.6016729492129702, "train/step_real_loss": 0.6204734556376934, "train/lr": 0.00032608026215880315, "train/step_canary_loss": 5.6743621826171875e-05, "perf/step_duration_sec": 6.166214196942747, "perf/samples_per_sec": 5.351744027374469, "perf/tokens_per_sec": 4143.871617802195, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25552.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777392.6803253, "event": "eval_step", "step": 430, "epoch": 3, "metrics": {"eval/loss": 0.8384166906277338, "eval/duration_sec": 8.017116383183748}} +{"timestamp": 1774777420.144991, "event": "train_step", "step": 435, "epoch": 3, "metrics": {"train/step_loss": 0.6554454565048218, "train/step_real_loss": 0.6554454565048218, "train/lr": 0.000322296870897501, "perf/step_duration_sec": 5.365280651953071, "perf/samples_per_sec": 5.96427327400876, "perf/tokens_per_sec": 4952.583419905024, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26572.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777448.0024645, "event": "train_step", "step": 440, "epoch": 3, "metrics": {"train/step_loss": 0.624190554022789, "train/step_real_loss": 0.624190554022789, "train/lr": 0.00031850211047123814, "perf/step_duration_sec": 5.632761848159134, "perf/samples_per_sec": 5.681049698640828, "perf/tokens_per_sec": 4973.226412750797, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28013.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777456.101795, "event": "eval_step", "step": 440, "epoch": 3, "metrics": {"eval/loss": 0.8376441550178405, "eval/duration_sec": 7.983136659953743}} +{"timestamp": 1774777484.0200975, "event": "train_step", "step": 445, "epoch": 3, "metrics": {"train/step_loss": 0.5429908894002438, "train/step_real_loss": 0.5429908894002438, "train/lr": 0.00031469722074027096, "perf/step_duration_sec": 5.173543720971793, "perf/samples_per_sec": 6.185315467671191, "perf/tokens_per_sec": 4501.9432049228035, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23291.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777511.9745524, "event": "train_step", "step": 450, "epoch": 3, "metrics": {"train/step_loss": 0.6589843071997166, "train/step_real_loss": 0.6589843071997166, "train/lr": 0.0003108834448743986, "perf/step_duration_sec": 5.574674537871033, "perf/samples_per_sec": 5.740245422869259, "perf/tokens_per_sec": 4424.1147770070165, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24663.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777520.070457, "event": "eval_step", "step": 450, "epoch": 3, "metrics": {"eval/loss": 0.8358532251455845, "eval/duration_sec": 8.0332653792575}} +{"timestamp": 1774777548.6372685, "event": "train_step", "step": 455, "epoch": 3, "metrics": {"train/step_loss": 0.655928798019886, "train/step_real_loss": 0.655928798019886, "train/lr": 0.00030706202894678294, "perf/step_duration_sec": 5.997780536301434, "perf/samples_per_sec": 5.335306920004943, "perf/tokens_per_sec": 4489.327316501659, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26926.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777576.310219, "event": "train_step", "step": 460, "epoch": 3, "metrics": {"train/step_loss": 0.6532393172383308, "train/step_real_loss": 0.6532393172383308, "train/lr": 0.0003032342215268198, "perf/step_duration_sec": 5.350616239011288, "perf/samples_per_sec": 5.980619534379672, "perf/tokens_per_sec": 5052.689034748576, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27035.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777584.323999, "event": "eval_step", "step": 460, "epoch": 3, "metrics": {"eval/loss": 0.8334910338505721, "eval/duration_sec": 7.98059185501188}} +{"timestamp": 1774777611.5518804, "event": "train_step", "step": 465, "epoch": 3, "metrics": {"train/step_loss": 0.6627544313669205, "train/step_real_loss": 0.6627544313669205, "train/lr": 0.00029940127327219364, "perf/step_duration_sec": 5.153066426049918, "perf/samples_per_sec": 6.209894721758825, "perf/tokens_per_sec": 4762.407073958854, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24541.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777638.9731936, "event": "train_step", "step": 470, "epoch": 3, "metrics": {"train/step_loss": 0.6096508540213108, "train/step_real_loss": 0.6096508540213108, "train/lr": 0.0002955644365202521, "perf/step_duration_sec": 5.822311669122428, "perf/samples_per_sec": 5.496098769446883, "perf/tokens_per_sec": 4240.583706801361, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24690.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777647.011718, "event": "eval_step", "step": 470, "epoch": 3, "metrics": {"eval/loss": 0.8328327670311317, "eval/duration_sec": 7.977800408843905}} +{"timestamp": 1774777674.82532, "event": "train_step", "step": 475, "epoch": 3, "metrics": {"train/step_loss": 0.6353404447436333, "train/step_real_loss": 0.6353404447436333, "train/lr": 0.000291724964878829, "perf/step_duration_sec": 5.559340914245695, "perf/samples_per_sec": 5.756078012413426, "perf/tokens_per_sec": 4729.517474387069, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26293.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777702.8073037, "event": "train_step", "step": 480, "epoch": 3, "metrics": {"train/step_loss": 0.5925041064620018, "train/step_real_loss": 0.5925041064620018, "train/lr": 0.00028788411281665486, "perf/step_duration_sec": 5.956611474975944, "perf/samples_per_sec": 5.372181841040629, "perf/tokens_per_sec": 4583.478394502851, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27302.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777710.8802543, "event": "eval_step", "step": 480, "epoch": 3, "metrics": {"eval/loss": 0.8290379631977814, "eval/duration_sec": 7.961310281883925}} +{"timestamp": 1774777739.009883, "event": "train_step", "step": 485, "epoch": 3, "metrics": {"train/step_loss": 0.6292324103415012, "train/step_real_loss": 0.6292324103415012, "train/lr": 0.0002840431352534841, "perf/step_duration_sec": 5.756399229634553, "perf/samples_per_sec": 5.5590306932258295, "perf/tokens_per_sec": 5152.005414656201, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 29657.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777766.4129379, "event": "train_step", "step": 490, "epoch": 3, "metrics": {"train/step_loss": 0.6007275655865669, "train/step_real_loss": 0.6007275655865669, "train/lr": 0.00028020328715007604, "perf/step_duration_sec": 5.138124481774867, "perf/samples_per_sec": 6.227953431939082, "perf/tokens_per_sec": 4496.971624949514, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23106.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777774.4573295, "event": "eval_step", "step": 490, "epoch": 3, "metrics": {"eval/loss": 0.8296268622462566, "eval/duration_sec": 7.976504778023809}} +{"timestamp": 1774777801.4793036, "event": "train_step", "step": 495, "epoch": 3, "metrics": {"train/step_loss": 0.6352631188929081, "train/step_real_loss": 0.6352631188929081, "train/lr": 0.0002763658230981624, "perf/step_duration_sec": 5.350606117863208, "perf/samples_per_sec": 5.980630847254248, "perf/tokens_per_sec": 4803.567938629021, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25702.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777828.9601095, "event": "train_step", "step": 500, "epoch": 3, "metrics": {"train/step_loss": 0.6333130970597267, "train/step_real_loss": 0.6333130970597267, "train/lr": 0.00027253199691053476, "perf/step_duration_sec": 5.57435943884775, "perf/samples_per_sec": 5.740569898846453, "perf/tokens_per_sec": 4709.24063795682, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26251.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777837.0041342, "event": "eval_step", "step": 500, "epoch": 3, "metrics": {"eval/loss": 0.8276229938253379, "eval/duration_sec": 7.984518128912896}} +{"timestamp": 1774777865.1091669, "event": "train_step", "step": 505, "epoch": 3, "metrics": {"train/step_loss": 0.5968369692564011, "train/step_real_loss": 0.5968369692564011, "train/lr": 0.00026870306121138684, "perf/step_duration_sec": 5.586268900893629, "perf/samples_per_sec": 5.728331479868611, "perf/tokens_per_sec": 5077.091794751049, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28362.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777892.766876, "event": "train_step", "step": 510, "epoch": 3, "metrics": {"train/step_loss": 0.6582905799150467, "train/step_real_loss": 0.6582905799150467, "train/lr": 0.00026488026702704566, "perf/step_duration_sec": 5.367043216247112, "perf/samples_per_sec": 5.962314576325678, "perf/tokens_per_sec": 5031.820857757852, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27006.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777900.824085, "event": "eval_step", "step": 510, "epoch": 3, "metrics": {"eval/loss": 0.8263755890612419, "eval/duration_sec": 8.024214160162956}} +{"timestamp": 1774777929.0799077, "event": "train_step", "step": 515, "epoch": 3, "metrics": {"train/step_loss": 0.6327243484556675, "train/step_real_loss": 0.6327243484556675, "train/lr": 0.0002610648633772241, "perf/step_duration_sec": 5.54789033299312, "perf/samples_per_sec": 5.7679582831147655, "perf/tokens_per_sec": 4711.700922519374, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26140.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777956.1342018, "event": "train_step", "step": 520, "epoch": 3, "metrics": {"train/step_loss": 0.551553413271904, "train/step_real_loss": 0.551553413271904, "train/lr": 0.0002572580968669288, "perf/step_duration_sec": 5.775545172858983, "perf/samples_per_sec": 5.540602495912868, "perf/tokens_per_sec": 4022.996843516734, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23235.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774777964.1705651, "event": "eval_step", "step": 520, "epoch": 3, "metrics": {"eval/loss": 0.8254020078442036, "eval/duration_sec": 8.011954854242504}} +{"timestamp": 1774777991.4521582, "event": "train_step", "step": 525, "epoch": 3, "metrics": {"train/step_loss": 0.5720972083508968, "train/step_real_loss": 0.5720972083508968, "train/lr": 0.0002534612112791585, "perf/step_duration_sec": 5.3801726759411395, "perf/samples_per_sec": 5.947764491481181, "perf/tokens_per_sec": 5013.0361281168425, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26971.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778019.9704347, "event": "train_step", "step": 530, "epoch": 3, "metrics": {"train/step_loss": 0.6010924763977528, "train/step_real_loss": 0.6010924763977528, "train/lr": 0.0002496754471685225, "perf/step_duration_sec": 5.1627288409508765, "perf/samples_per_sec": 6.198272461295141, "perf/tokens_per_sec": 4912.324621590815, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25361.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778028.0827634, "event": "eval_step", "step": 530, "epoch": 3, "metrics": {"eval/loss": 0.8241049811626093, "eval/duration_sec": 8.008919743821025}} +{"timestamp": 1774778056.551015, "event": "train_step", "step": 535, "epoch": 3, "metrics": {"train/step_loss": 0.574561133980751, "train/step_real_loss": 0.574561133980751, "train/lr": 0.0002459020414559154, "perf/step_duration_sec": 5.568423870950937, "perf/samples_per_sec": 5.7466889629103, "perf/tokens_per_sec": 5025.659082095149, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27985.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778084.7196627, "event": "train_step", "step": 540, "epoch": 3, "metrics": {"train/step_loss": 0.5114041939377785, "train/step_real_loss": 0.5114041939377785, "train/lr": 0.00024214222702437807, "perf/step_duration_sec": 5.356702874880284, "perf/samples_per_sec": 5.973823963628963, "perf/tokens_per_sec": 4251.495842115188, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22774.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778092.7705927, "event": "eval_step", "step": 540, "epoch": 3, "metrics": {"eval/loss": 0.822957703222831, "eval/duration_sec": 8.02520863013342}} +{"timestamp": 1774778120.8158183, "event": "train_step", "step": 545, "epoch": 3, "metrics": {"train/step_loss": 0.6505688058607506, "train/step_real_loss": 0.6707703322172165, "train/lr": 0.00023839723231627885, "train/step_canary_loss": 0.004119873046875, "perf/step_duration_sec": 5.363548584282398, "perf/samples_per_sec": 6.152643064836738, "perf/tokens_per_sec": 5233.475479695977, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28070.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778151.3105142, "event": "train_epoch", "step": 549, "epoch": 3, "metrics": {"train/epoch_loss": 0.618876052385613, "train/epoch_real_loss": 0.6187061274989096, "train/epoch_canary_loss": 0.6312849450442525, "perf/epoch_duration_sec": 1169.5868065892719, "perf/epoch_samples_per_sec": 40.40315753715125, "perf/epoch_tokens_per_sec": 31807.174799180255, "perf/epoch_samples": 47255.0, "perf/epoch_tokens": 37201252.0, "system/cuda_epoch_peak_memory_gb": 40.75499105453491, "eval/loss": 0.8212210752834113, "eval/duration_sec": 8.066503438167274}} +{"timestamp": 1774778165.9748986, "event": "audit_epoch", "step": 549, "epoch": 3, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 0.995232, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.66, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 8.78897566581145}} +{"timestamp": 1774778171.7605834, "event": "train_step", "step": 550, "epoch": 4, "metrics": {"train/step_loss": 0.5180757567286491, "train/step_real_loss": 0.5180757567286491, "train/lr": 0.00023466828093194497, "perf/step_duration_sec": 5.260420132894069, "perf/samples_per_sec": 6.083164308474141, "perf/tokens_per_sec": 4991.046216218393, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26255.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 35.8866229057312}} +{"timestamp": 1774778179.8689337, "event": "eval_step", "step": 550, "epoch": 4, "metrics": {"eval/loss": 0.823262078448748, "eval/duration_sec": 7.969217341858894}} +{"timestamp": 1774778208.4192019, "event": "train_step", "step": 555, "epoch": 4, "metrics": {"train/step_loss": 0.5451440793095212, "train/step_real_loss": 0.5621373914182186, "train/lr": 0.00023095659122987608, "train/step_canary_loss": 0.0013580322265625, "perf/step_duration_sec": 5.575922399759293, "perf/samples_per_sec": 5.9183033109328385, "perf/tokens_per_sec": 5164.167995100336, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 28795.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778236.3493562, "event": "train_step", "step": 560, "epoch": 4, "metrics": {"train/step_loss": 0.5256189288515033, "train/step_real_loss": 0.5420436933636665, "train/lr": 0.00022726337592867053, "train/step_canary_loss": 2.6464462280273438e-05, "perf/step_duration_sec": 5.566084073390812, "perf/samples_per_sec": 5.9287642020643565, "perf/tokens_per_sec": 4869.312005107584, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27103.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778244.4341986, "event": "eval_step", "step": 560, "epoch": 4, "metrics": {"eval/loss": 0.833100814658862, "eval/duration_sec": 8.076387062203139}} +{"timestamp": 1774778272.530545, "event": "train_step", "step": 565, "epoch": 4, "metrics": {"train/step_loss": 0.5232922082597559, "train/step_real_loss": 0.5396448485553265, "train/lr": 0.00022358984171079465, "train/step_canary_loss": 7.510185241699219e-06, "perf/step_duration_sec": 5.36115903314203, "perf/samples_per_sec": 6.155385392598509, "perf/tokens_per_sec": 4533.72113189392, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24306.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778300.0982258, "event": "train_step", "step": 570, "epoch": 4, "metrics": {"train/step_loss": 0.45709382370114326, "train/step_real_loss": 0.45709382370114326, "train/lr": 0.00021993718882832398, "perf/step_duration_sec": 5.571038939990103, "perf/samples_per_sec": 5.743991443013831, "perf/tokens_per_sec": 4005.716032571771, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22316.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778308.411285, "event": "eval_step", "step": 570, "epoch": 4, "metrics": {"eval/loss": 0.833434504385178, "eval/duration_sec": 8.278032524045557}} +{"timestamp": 1774778336.8261273, "event": "train_step", "step": 575, "epoch": 4, "metrics": {"train/step_loss": 0.47478755190968513, "train/step_real_loss": 0.47478755190968513, "train/lr": 0.00021630661071078515, "perf/step_duration_sec": 5.377098405268043, "perf/samples_per_sec": 5.951165031432009, "perf/tokens_per_sec": 4126.761001483634, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22190.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778364.102502, "event": "train_step", "step": 580, "epoch": 4, "metrics": {"train/step_loss": 0.5749864056706429, "train/step_real_loss": 0.5749864056706429, "train/lr": 0.0002126992935752274, "perf/step_duration_sec": 5.5829355218447745, "perf/samples_per_sec": 5.731751669850238, "perf/tokens_per_sec": 5143.709771971541, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28717.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778372.1327357, "event": "eval_step", "step": 580, "epoch": 4, "metrics": {"eval/loss": 0.8331747124783504, "eval/duration_sec": 7.9972805245779455}} +{"timestamp": 1774778400.1715353, "event": "train_step", "step": 585, "epoch": 4, "metrics": {"train/step_loss": 0.4962359592318535, "train/step_real_loss": 0.4962359592318535, "train/lr": 0.00020911641603865044, "perf/step_duration_sec": 5.583395640831441, "perf/samples_per_sec": 5.731279325073009, "perf/tokens_per_sec": 4627.1125426019125, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25835.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778428.1057754, "event": "train_step", "step": 590, "epoch": 4, "metrics": {"train/step_loss": 0.49132801964879036, "train/step_real_loss": 0.49132801964879036, "train/lr": 0.00020555914873291586, "perf/step_duration_sec": 5.560919350013137, "perf/samples_per_sec": 5.754444181954267, "perf/tokens_per_sec": 4354.675634693892, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24216.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778436.262796, "event": "eval_step", "step": 590, "epoch": 4, "metrics": {"eval/loss": 0.832694406597278, "eval/duration_sec": 8.076712809037417}} +{"timestamp": 1774778464.5679834, "event": "train_step", "step": 595, "epoch": 4, "metrics": {"train/step_loss": 0.5974587462842464, "train/step_real_loss": 0.5974587462842464, "train/lr": 0.00020202865392226658, "perf/step_duration_sec": 5.775737227872014, "perf/samples_per_sec": 5.5404182596080345, "perf/tokens_per_sec": 4283.9552811713, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24743.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778493.1003833, "event": "train_step", "step": 600, "epoch": 4, "metrics": {"train/step_loss": 0.5553012117743492, "train/step_real_loss": 0.5553012117743492, "train/lr": 0.00019852608512358063, "perf/step_duration_sec": 5.5705516380257905, "perf/samples_per_sec": 5.744493917184266, "perf/tokens_per_sec": 4815.501541514623, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26825.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778501.3729606, "event": "eval_step", "step": 600, "epoch": 4, "metrics": {"eval/loss": 0.8326796570267434, "eval/duration_sec": 7.992954220157117}} +{"timestamp": 1774778528.8685956, "event": "train_step", "step": 605, "epoch": 4, "metrics": {"train/step_loss": 0.5818134099245071, "train/step_real_loss": 0.5818134099245071, "train/lr": 0.0001950525867294842, "perf/step_duration_sec": 5.372858878690749, "perf/samples_per_sec": 5.95586087825886, "perf/tokens_per_sec": 4665.114153548698, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25065.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778557.7063594, "event": "train_step", "step": 610, "epoch": 4, "metrics": {"train/step_loss": 0.48558534309268, "train/step_real_loss": 0.48558534309268, "train/lr": 0.00019160929363444367, "perf/step_duration_sec": 5.5707630147226155, "perf/samples_per_sec": 5.744275948452525, "perf/tokens_per_sec": 4153.291019354565, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23137.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778565.7590594, "event": "eval_step", "step": 610, "epoch": 4, "metrics": {"eval/loss": 0.8327087859312693, "eval/duration_sec": 7.998735473956913}} +{"timestamp": 1774778593.0091321, "event": "train_step", "step": 615, "epoch": 4, "metrics": {"train/step_loss": 0.47490209713578224, "train/step_real_loss": 0.47490209713578224, "train/lr": 0.00018819733086396272, "perf/step_duration_sec": 5.353905830066651, "perf/samples_per_sec": 5.976944872712046, "perf/tokens_per_sec": 4306.949119370845, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23059.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778621.728378, "event": "train_step", "step": 620, "epoch": 4, "metrics": {"train/step_loss": 0.4632937492746295, "train/step_real_loss": 0.47772709280252457, "train/lr": 0.00018481781320700295, "train/step_canary_loss": 0.00142669677734375, "perf/step_duration_sec": 5.370860083028674, "perf/samples_per_sec": 6.1442673035323265, "perf/tokens_per_sec": 4177.729386565405, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 22438.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778629.9384522, "event": "eval_step", "step": 620, "epoch": 4, "metrics": {"eval/loss": 0.8314254669806896, "eval/duration_sec": 8.12436051806435}} +{"timestamp": 1774778657.5562103, "event": "train_step", "step": 625, "epoch": 4, "metrics": {"train/step_loss": 0.49716726318001747, "train/step_real_loss": 0.49716726318001747, "train/lr": 0.00018147184485174976, "perf/step_duration_sec": 5.58306485414505, "perf/samples_per_sec": 5.73161889320382, "perf/tokens_per_sec": 4177.454607693522, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23323.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778686.3500345, "event": "train_step", "step": 630, "epoch": 4, "metrics": {"train/step_loss": 0.5066407434642315, "train/step_real_loss": 0.5066407434642315, "train/lr": 0.0001781605190248411, "perf/step_duration_sec": 5.187247751280665, "perf/samples_per_sec": 6.168974672956311, "perf/tokens_per_sec": 5047.3779652294415, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26182.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778694.4731143, "event": "eval_step", "step": 630, "epoch": 4, "metrics": {"eval/loss": 0.8315223921567967, "eval/duration_sec": 8.07707959599793}} +{"timestamp": 1774778723.3294988, "event": "train_step", "step": 635, "epoch": 4, "metrics": {"train/step_loss": 0.5829190449281172, "train/step_real_loss": 0.6010983064770699, "train/lr": 0.00017488491763417838, "train/step_canary_loss": 0.00118255615234375, "perf/step_duration_sec": 5.980441524647176, "perf/samples_per_sec": 5.517987236226154, "perf/tokens_per_sec": 3893.6924479624918, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 23286.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778752.5580685, "event": "train_step", "step": 640, "epoch": 4, "metrics": {"train/step_loss": 0.48053088693907764, "train/step_real_loss": 0.4951431192457676, "train/lr": 0.00017164611091543476, "train/step_canary_loss": 0.012939453125, "perf/step_duration_sec": 5.563002331182361, "perf/samples_per_sec": 5.932048565758228, "perf/tokens_per_sec": 5026.242725671691, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27961.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778760.6625617, "event": "eval_step", "step": 640, "epoch": 4, "metrics": {"eval/loss": 0.83028123432245, "eval/duration_sec": 8.048092365730554}} +{"timestamp": 1774778788.1529777, "event": "train_step", "step": 645, "epoch": 4, "metrics": {"train/step_loss": 0.4889695532619953, "train/step_real_loss": 0.4889695532619953, "train/lr": 0.00016844515708237828, "perf/step_duration_sec": 5.3694471451453865, "perf/samples_per_sec": 5.959645217651835, "perf/tokens_per_sec": 5057.69016174284, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27157.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778815.830699, "event": "train_step", "step": 650, "epoch": 4, "metrics": {"train/step_loss": 0.5059054270386696, "train/step_real_loss": 0.5059054270386696, "train/lr": 0.00016528310198112147, "perf/step_duration_sec": 5.366070821881294, "perf/samples_per_sec": 5.963395016985836, "perf/tokens_per_sec": 4230.283340174327, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22700.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778824.006402, "event": "eval_step", "step": 650, "epoch": 4, "metrics": {"eval/loss": 0.8293274115675534, "eval/duration_sec": 7.9986004792153835}} +{"timestamp": 1774778852.1832697, "event": "train_step", "step": 655, "epoch": 4, "metrics": {"train/step_loss": 0.5198403073079658, "train/step_real_loss": 0.5359956696629524, "train/lr": 0.00016216097874841302, "train/step_canary_loss": 0.00286865234375, "perf/step_duration_sec": 5.579722528811544, "perf/samples_per_sec": 5.914272587857313, "perf/tokens_per_sec": 4985.194130419364, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27816.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778880.0706246, "event": "train_step", "step": 660, "epoch": 4, "metrics": {"train/step_loss": 0.5308283120393753, "train/step_real_loss": 0.5308283120393753, "train/lr": 0.0001590798074740811, "perf/step_duration_sec": 5.569832825101912, "perf/samples_per_sec": 5.745235270937327, "perf/tokens_per_sec": 4402.64560356016, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24522.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778888.300408, "event": "eval_step", "step": 660, "epoch": 4, "metrics": {"eval/loss": 0.8283359103668958, "eval/duration_sec": 7.952313159126788}} +{"timestamp": 1774778915.589216, "event": "train_step", "step": 665, "epoch": 4, "metrics": {"train/step_loss": 0.4788390343839472, "train/step_real_loss": 0.4937121532857418, "train/lr": 0.0001560405948677405, "train/step_canary_loss": 0.002899169921875, "perf/step_duration_sec": 5.5756562650203705, "perf/samples_per_sec": 5.9185858007477865, "perf/tokens_per_sec": 4353.209531992432, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24272.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.309076309204102, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778943.5184507, "event": "train_step", "step": 670, "epoch": 4, "metrics": {"train/step_loss": 0.506463136523962, "train/step_real_loss": 0.506463136523962, "train/lr": 0.00015304433392987027, "perf/step_duration_sec": 6.165034724865109, "perf/samples_per_sec": 5.190562815637695, "perf/tokens_per_sec": 4148.719535486416, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25577.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774778951.679109, "event": "eval_step", "step": 670, "epoch": 4, "metrics": {"eval/loss": 0.8280324508937505, "eval/duration_sec": 7.9702418143861}} +{"timestamp": 1774778979.5279357, "event": "train_step", "step": 675, "epoch": 4, "metrics": {"train/step_loss": 0.5075479447841644, "train/step_real_loss": 0.5233887918293476, "train/lr": 0.00015009200362737146, "train/step_canary_loss": 0.000640869140625, "perf/step_duration_sec": 5.5517630209214985, "perf/samples_per_sec": 5.944057748077755, "perf/tokens_per_sec": 4889.257682237048, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27144.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779007.1428683, "event": "train_step", "step": 680, "epoch": 4, "metrics": {"train/step_loss": 0.5368370594400348, "train/step_real_loss": 0.5535868704319, "train/lr": 0.0001471845685737095, "train/step_canary_loss": 0.000843048095703125, "perf/step_duration_sec": 5.576026641763747, "perf/samples_per_sec": 5.918192670177381, "perf/tokens_per_sec": 4488.859470743631, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25030.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779015.188928, "event": "eval_step", "step": 680, "epoch": 4, "metrics": {"eval/loss": 0.8271838883176827, "eval/duration_sec": 8.026436937972903}} +{"timestamp": 1774779043.2006102, "event": "train_step", "step": 685, "epoch": 4, "metrics": {"train/step_loss": 0.523221131414175, "train/step_real_loss": 0.523221131414175, "train/lr": 0.0001443229787137471, "perf/step_duration_sec": 5.168544008862227, "perf/samples_per_sec": 6.191298738122633, "perf/tokens_per_sec": 4871.004282217981, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25176.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779071.7059052, "event": "train_step", "step": 690, "epoch": 4, "metrics": {"train/step_loss": 0.504251120668469, "train/step_real_loss": 0.5199902504682541, "train/lr": 0.00014150816901336818, "train/step_canary_loss": 0.000598907470703125, "perf/step_duration_sec": 5.601179223973304, "perf/samples_per_sec": 5.891616511530016, "perf/tokens_per_sec": 4747.750239124779, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26593.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779079.7954233, "event": "eval_step", "step": 690, "epoch": 4, "metrics": {"eval/loss": 0.8257303711695548, "eval/duration_sec": 8.043733655009419}} +{"timestamp": 1774779107.7531385, "event": "train_step", "step": 695, "epoch": 4, "metrics": {"train/step_loss": 0.6023982889724501, "train/step_real_loss": 0.6212033219635487, "train/lr": 0.00013874105915399817, "train/step_canary_loss": 0.000637054443359375, "perf/step_duration_sec": 5.564382956828922, "perf/samples_per_sec": 5.930576715518933, "perf/tokens_per_sec": 4508.855733807713, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25089.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779135.726466, "event": "train_step", "step": 700, "epoch": 4, "metrics": {"train/step_loss": 0.5686237066984177, "train/step_real_loss": 0.5686237066984177, "train/lr": 0.00013602255323211574, "perf/step_duration_sec": 5.167674286291003, "perf/samples_per_sec": 6.192340737281136, "perf/tokens_per_sec": 4737.721195964189, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24483.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779143.9804108, "event": "eval_step", "step": 700, "epoch": 4, "metrics": {"eval/loss": 0.8252944631072191, "eval/duration_sec": 7.99556155083701}} +{"timestamp": 1774779171.9807086, "event": "train_step", "step": 705, "epoch": 4, "metrics": {"train/step_loss": 0.487790122628212, "train/step_real_loss": 0.487790122628212, "train/lr": 0.0001333535394638588, "perf/step_duration_sec": 5.361640906892717, "perf/samples_per_sec": 5.968322115504238, "perf/tokens_per_sec": 4609.409773830366, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24714.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779199.8595273, "event": "train_step", "step": 710, "epoch": 4, "metrics": {"train/step_loss": 0.5094946660101414, "train/step_real_loss": 0.5094946660101414, "train/lr": 0.00013073488989481723, "perf/step_duration_sec": 5.363085232209414, "perf/samples_per_sec": 5.966714794651335, "perf/tokens_per_sec": 4066.3161325548845, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21808.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779207.9833207, "event": "eval_step", "step": 710, "epoch": 4, "metrics": {"eval/loss": 0.825678862631321, "eval/duration_sec": 8.079021411947906}} +{"timestamp": 1774779236.2823112, "event": "train_step", "step": 715, "epoch": 4, "metrics": {"train/step_loss": 0.4896374462228833, "train/step_real_loss": 0.5049141757190228, "train/lr": 0.00012816746011511056, "train/step_canary_loss": 0.000782012939453125, "perf/step_duration_sec": 5.407786596100777, "perf/samples_per_sec": 6.102311807902011, "perf/tokens_per_sec": 4930.852859324459, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26665.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779264.2631428, "event": "train_step", "step": 720, "epoch": 4, "metrics": {"train/step_loss": 0.557859979569912, "train/step_real_loss": 0.557859979569912, "train/lr": 0.00012565208897984085, "perf/step_duration_sec": 5.387909682933241, "perf/samples_per_sec": 5.939223536237679, "perf/tokens_per_sec": 4935.865960084527, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26594.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 12.976430416107178, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779272.3113713, "event": "eval_step", "step": 720, "epoch": 4, "metrics": {"eval/loss": 0.8253711991203138, "eval/duration_sec": 8.033449783921242}} +{"timestamp": 1774779299.4568923, "event": "train_step", "step": 725, "epoch": 4, "metrics": {"train/step_loss": 0.6419739201664925, "train/step_real_loss": 0.6419739201664925, "train/lr": 0.0001231895983350142, "perf/step_duration_sec": 5.571061012335122, "perf/samples_per_sec": 5.7439686855246155, "perf/tokens_per_sec": 4551.377187192566, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25356.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779326.5138097, "event": "train_step", "step": 730, "epoch": 4, "metrics": {"train/step_loss": 0.5253416933119297, "train/step_real_loss": 0.5253416933119297, "train/lr": 0.00012078079274901962, "perf/step_duration_sec": 5.180023550055921, "perf/samples_per_sec": 6.177578092218238, "perf/tokens_per_sec": 4718.125267931679, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24440.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779334.698311, "event": "eval_step", "step": 730, "epoch": 4, "metrics": {"eval/loss": 0.8238046290591741, "eval/duration_sec": 7.967710657976568}} +{"timestamp": 1774779354.3222914, "event": "train_epoch", "step": 732, "epoch": 4, "metrics": {"train/epoch_loss": 0.5213524488013805, "train/epoch_real_loss": 0.5240326992813701, "train/epoch_canary_loss": 0.2281814136922583, "perf/epoch_duration_sec": 1180.0235376139171, "perf/epoch_samples_per_sec": 40.04835369263794, "perf/epoch_tokens_per_sec": 31525.977079426393, "perf/epoch_samples": 47258.0, "perf/epoch_tokens": 37201395.0, "system/cuda_epoch_peak_memory_gb": 40.75499105453491, "eval/loss": 0.824620593052644, "eval/duration_sec": 8.099770626053214}} +{"timestamp": 1774779368.7289462, "event": "audit_epoch", "step": 732, "epoch": 4, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 1.0, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.668, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 8.706948568113148}} +{"timestamp": 1774779385.800829, "event": "train_step", "step": 735, "epoch": 5, "metrics": {"train/step_loss": 0.42318422595659894, "train/step_real_loss": 0.43615314923226833, "train/lr": 0.00011842645924975238, "train/step_canary_loss": 0.0081787109375, "perf/step_duration_sec": 5.571188475005329, "perf/samples_per_sec": 5.92333218451534, "perf/tokens_per_sec": 4615.173246310861, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 25712.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779414.3137147, "event": "train_step", "step": 740, "epoch": 5, "metrics": {"train/step_loss": 0.4691705834120512, "train/step_real_loss": 0.4691705834120512, "train/lr": 0.00011612736706746884, "perf/step_duration_sec": 6.167387739755213, "perf/samples_per_sec": 5.18858248423831, "perf/tokens_per_sec": 3807.9331138230223, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23485.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779422.40402, "event": "eval_step", "step": 740, "epoch": 5, "metrics": {"eval/loss": 0.8344086596790032, "eval/duration_sec": 8.040329500101507}} +{"timestamp": 1774779451.0426776, "event": "train_step", "step": 745, "epoch": 5, "metrics": {"train/step_loss": 0.4132154881954193, "train/step_real_loss": 0.4261278212070465, "train/lr": 0.0001138842673834566, "train/step_canary_loss": 2.0623207092285156e-05, "perf/step_duration_sec": 5.566899357363582, "perf/samples_per_sec": 5.927895922233524, "perf/tokens_per_sec": 4747.885367289523, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26431.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.309076309204102, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779478.588797, "event": "train_step", "step": 750, "epoch": 5, "metrics": {"train/step_loss": 0.39074372793688916, "train/step_real_loss": 0.4029543697834015, "train/lr": 0.00011169789308460094, "train/step_canary_loss": 3.0994415283203125e-06, "perf/step_duration_sec": 5.364716672338545, "perf/samples_per_sec": 6.151303417411398, "perf/tokens_per_sec": 4977.522883488987, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26703.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779486.7065215, "event": "eval_step", "step": 750, "epoch": 5, "metrics": {"eval/loss": 0.8364700059860181, "eval/duration_sec": 8.081005931831896}} +{"timestamp": 1774779514.413148, "event": "train_step", "step": 755, "epoch": 5, "metrics": {"train/step_loss": 0.5102705582976341, "train/step_real_loss": 0.5102705582976341, "train/lr": 0.00010956895852392922, "perf/step_duration_sec": 5.774958090856671, "perf/samples_per_sec": 5.541165753335024, "perf/tokens_per_sec": 4515.876927538253, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26079.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779542.7859716, "event": "train_step", "step": 760, "epoch": 5, "metrics": {"train/step_loss": 0.4255596064031124, "train/step_real_loss": 0.4255596064031124, "train/lr": 0.00010749815928721076, "perf/step_duration_sec": 5.772368049249053, "perf/samples_per_sec": 5.543652055270972, "perf/tokens_per_sec": 4186.843214743401, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24168.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779550.881906, "event": "eval_step", "step": 760, "epoch": 5, "metrics": {"eval/loss": 0.8355491659007012, "eval/duration_sec": 8.078910521697253}} +{"timestamp": 1774779578.705171, "event": "train_step", "step": 765, "epoch": 5, "metrics": {"train/step_loss": 0.4127242700620131, "train/step_real_loss": 0.4254235364496708, "train/lr": 0.00010548617196568822, "train/step_canary_loss": 0.00634765625, "perf/step_duration_sec": 5.577639065682888, "perf/samples_per_sec": 5.916481796578873, "perf/tokens_per_sec": 4995.12422225709, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27861.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779607.624052, "event": "train_step", "step": 770, "epoch": 5, "metrics": {"train/step_loss": 0.48758089542388916, "train/step_real_loss": 0.48758089542388916, "train/lr": 0.00010353365393501555, "perf/step_duration_sec": 5.773486447054893, "perf/samples_per_sec": 5.542578179311997, "perf/tokens_per_sec": 3954.8027365072135, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22833.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779615.6866734, "event": "eval_step", "step": 770, "epoch": 5, "metrics": {"eval/loss": 0.8358238713863568, "eval/duration_sec": 8.052401037886739}} +{"timestamp": 1774779643.3274353, "event": "train_step", "step": 775, "epoch": 5, "metrics": {"train/step_loss": 0.4474029913544655, "train/step_real_loss": 0.4474029913544655, "train/lr": 0.00010164124314047353, "perf/step_duration_sec": 5.150441952049732, "perf/samples_per_sec": 6.213059053556538, "perf/tokens_per_sec": 4908.316652309665, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25280.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779671.4097562, "event": "train_step", "step": 780, "epoch": 5, "metrics": {"train/step_loss": 0.4756400063633919, "train/step_real_loss": 0.4756400063633919, "train/lr": 9.98095578885346e-05, "perf/step_duration_sec": 5.369054526090622, "perf/samples_per_sec": 5.960081024414593, "perf/tokens_per_sec": 4694.122564322531, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25203.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779679.4439287, "event": "eval_step", "step": 780, "epoch": 5, "metrics": {"eval/loss": 0.8354146156746607, "eval/duration_sec": 8.006095611024648}} +{"timestamp": 1774779707.9254673, "event": "train_step", "step": 785, "epoch": 5, "metrics": {"train/step_loss": 0.5214999057352543, "train/step_real_loss": 0.5214999057352543, "train/lr": 9.803919664484381e-05, "perf/step_duration_sec": 5.382850666064769, "perf/samples_per_sec": 5.9448054544292575, "perf/tokens_per_sec": 4127.181186737512, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22216.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779735.6238468, "event": "train_step", "step": 790, "epoch": 5, "metrics": {"train/step_loss": 0.460898719727993, "train/step_real_loss": 0.460898719727993, "train/lr": 9.633073783868233e-05, "perf/step_duration_sec": 5.7590627782046795, "perf/samples_per_sec": 5.556459658871026, "perf/tokens_per_sec": 4826.6534105511855, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27797.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779743.7360282, "event": "eval_step", "step": 790, "epoch": 5, "metrics": {"eval/loss": 0.8359597510634325, "eval/duration_sec": 7.976056095212698}} +{"timestamp": 1774779771.1896534, "event": "train_step", "step": 795, "epoch": 5, "metrics": {"train/step_loss": 0.472298697088704, "train/step_real_loss": 0.4870236963033676, "train/lr": 9.46847396739775e-05, "train/step_canary_loss": 0.0010986328125, "perf/step_duration_sec": 5.357944064307958, "perf/samples_per_sec": 6.159078856352775, "perf/tokens_per_sec": 4596.912491877844, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24630.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779799.0197642, "event": "train_step", "step": 800, "epoch": 5, "metrics": {"train/step_loss": 0.47349783405661583, "train/step_real_loss": 0.47349783405661583, "train/lr": 9.310173994692131e-05, "perf/step_duration_sec": 5.568522645160556, "perf/samples_per_sec": 5.746587028394378, "perf/tokens_per_sec": 4947.631850602922, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27551.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779807.0529833, "event": "eval_step", "step": 800, "epoch": 5, "metrics": {"eval/loss": 0.8356412670169121, "eval/duration_sec": 8.008117423858494}} +{"timestamp": 1774779835.3282235, "event": "train_step", "step": 805, "epoch": 5, "metrics": {"train/step_loss": 0.48957132175564766, "train/step_real_loss": 0.48957132175564766, "train/lr": 9.158225587025625e-05, "perf/step_duration_sec": 5.564906955230981, "perf/samples_per_sec": 5.750320761413662, "perf/tokens_per_sec": 4639.250971794266, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25817.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779863.427236, "event": "train_step", "step": 810, "epoch": 5, "metrics": {"train/step_loss": 0.400253126726431, "train/step_real_loss": 0.42523926869034767, "train/lr": 9.012678390428698e-05, "train/step_canary_loss": 0.0004748106002807617, "perf/step_duration_sec": 5.578665527980775, "perf/samples_per_sec": 6.0946475155155015, "perf/tokens_per_sec": 4391.910552283685, "perf/logical_batch_size": 34.0, "perf/logical_token_count": 24501.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779871.4737754, "event": "eval_step", "step": 810, "epoch": 5, "metrics": {"eval/loss": 0.8356303077859756, "eval/duration_sec": 8.015352679882199}} +{"timestamp": 1774779898.6069329, "event": "train_step", "step": 815, "epoch": 5, "metrics": {"train/step_loss": 0.49400583282113075, "train/step_real_loss": 0.49400583282113075, "train/lr": 8.873579959467168e-05, "perf/step_duration_sec": 5.174814518075436, "perf/samples_per_sec": 6.183796518353494, "perf/tokens_per_sec": 5106.849705953994, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26427.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779925.9370394, "event": "train_step", "step": 820, "epoch": 5, "metrics": {"train/step_loss": 0.4452643846020554, "train/step_real_loss": 0.459178801625967, "train/lr": 8.740975741704755e-05, "train/step_canary_loss": 2.9802322387695312e-06, "perf/step_duration_sec": 5.363772180862725, "perf/samples_per_sec": 6.152386583035707, "perf/tokens_per_sec": 5104.61650434902, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27380.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779934.1756742, "event": "eval_step", "step": 820, "epoch": 5, "metrics": {"eval/loss": 0.834639798849821, "eval/duration_sec": 8.06301367096603}} +{"timestamp": 1774779960.932439, "event": "train_step", "step": 825, "epoch": 5, "metrics": {"train/step_loss": 0.5011806972324848, "train/step_real_loss": 0.5011806972324848, "train/lr": 8.614909062854017e-05, "perf/step_duration_sec": 5.579815643839538, "perf/samples_per_sec": 5.734956500817367, "perf/tokens_per_sec": 4182.933897783667, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23340.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779988.278773, "event": "train_step", "step": 830, "epoch": 5, "metrics": {"train/step_loss": 0.455628469134822, "train/step_real_loss": 0.4698615223169327, "train/lr": 8.495421112620545e-05, "train/step_canary_loss": 0.00017070770263671875, "perf/step_duration_sec": 5.3757620439864695, "perf/samples_per_sec": 6.13866457071236, "perf/tokens_per_sec": 5074.443358317045, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 27279.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774779996.5944686, "event": "eval_step", "step": 830, "epoch": 5, "metrics": {"eval/loss": 0.8347919674064869, "eval/duration_sec": 8.094597054179758}} +{"timestamp": 1774780025.0807564, "event": "train_step", "step": 835, "epoch": 5, "metrics": {"train/step_loss": 0.46368369832634926, "train/step_real_loss": 0.46368369832634926, "train/lr": 8.382550931245087e-05, "perf/step_duration_sec": 5.777644995599985, "perf/samples_per_sec": 5.538588823711023, "perf/tokens_per_sec": 4142.864440135845, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 23936.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780052.6953216, "event": "train_step", "step": 840, "epoch": 5, "metrics": {"train/step_loss": 0.4633300620498079, "train/step_real_loss": 0.47780904918909073, "train/lr": 8.276335396747944e-05, "train/step_canary_loss": 2.5033950805664062e-06, "perf/step_duration_sec": 5.386584116145968, "perf/samples_per_sec": 6.126331509626749, "perf/tokens_per_sec": 4639.675063290658, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24992.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780060.8030653, "event": "eval_step", "step": 840, "epoch": 5, "metrics": {"eval/loss": 0.8355972007490122, "eval/duration_sec": 8.043062489014119}} +{"timestamp": 1774780088.730534, "event": "train_step", "step": 845, "epoch": 5, "metrics": {"train/step_loss": 0.5168259516358376, "train/step_real_loss": 0.5168259516358376, "train/lr": 8.176809212879815e-05, "perf/step_duration_sec": 5.360136340837926, "perf/samples_per_sec": 5.96999739655831, "perf/tokens_per_sec": 4742.043557053722, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25418.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780116.2042499, "event": "train_step", "step": 850, "epoch": 5, "metrics": {"train/step_loss": 0.4218024015426636, "train/step_real_loss": 0.4218024015426636, "train/lr": 8.084004897783088e-05, "perf/step_duration_sec": 5.767635643016547, "perf/samples_per_sec": 5.5482006805935455, "perf/tokens_per_sec": 4335.225306798782, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25004.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780124.280089, "event": "eval_step", "step": 850, "epoch": 5, "metrics": {"eval/loss": 0.8352956880743686, "eval/duration_sec": 8.048830876126885}} +{"timestamp": 1774780151.5777185, "event": "train_step", "step": 855, "epoch": 5, "metrics": {"train/step_loss": 0.5172661505639553, "train/step_real_loss": 0.5172661505639553, "train/lr": 7.997952773367183e-05, "perf/step_duration_sec": 5.379984455183148, "perf/samples_per_sec": 5.947972576235006, "perf/tokens_per_sec": 5035.330534068948, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27090.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780178.7315419, "event": "train_step", "step": 860, "epoch": 5, "metrics": {"train/step_loss": 0.5238031931221485, "train/step_real_loss": 0.5238031931221485, "train/lr": 7.918680955401487e-05, "perf/step_duration_sec": 5.7775211008265615, "perf/samples_per_sec": 5.538707594754075, "perf/tokens_per_sec": 4628.801787703382, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26743.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780186.8056512, "event": "eval_step", "step": 860, "epoch": 5, "metrics": {"eval/loss": 0.833830323165808, "eval/duration_sec": 7.991446542087942}} +{"timestamp": 1774780214.8234124, "event": "train_step", "step": 865, "epoch": 5, "metrics": {"train/step_loss": 0.44141877442598343, "train/step_real_loss": 0.44141877442598343, "train/lr": 7.846215344329122e-05, "perf/step_duration_sec": 5.57034646300599, "perf/samples_per_sec": 5.744705506653795, "perf/tokens_per_sec": 4468.303751894155, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24890.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780242.6869266, "event": "train_step", "step": 870, "epoch": 5, "metrics": {"train/step_loss": 0.400652464479208, "train/step_real_loss": 0.400652464479208, "train/lr": 7.780579616804475e-05, "perf/step_duration_sec": 5.575916894245893, "perf/samples_per_sec": 5.738966452857758, "perf/tokens_per_sec": 4586.689594744912, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 25575.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780250.8774233, "event": "eval_step", "step": 870, "epoch": 5, "metrics": {"eval/loss": 0.8351624541175671, "eval/duration_sec": 8.086653731763363}} +{"timestamp": 1774780279.2465012, "event": "train_step", "step": 875, "epoch": 5, "metrics": {"train/step_loss": 0.4823111221194267, "train/step_real_loss": 0.4823111221194267, "train/lr": 7.721795217957361e-05, "perf/step_duration_sec": 5.56989385606721, "perf/samples_per_sec": 5.745172318704571, "perf/tokens_per_sec": 4882.139714454169, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27193.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 40.75499105453491}} +{"timestamp": 1774780306.8157218, "event": "train_step", "step": 880, "epoch": 5, "metrics": {"train/step_loss": 0.4977773316204548, "train/step_real_loss": 0.4977773316204548, "train/lr": 7.669881354386264e-05, "perf/step_duration_sec": 5.3732125060632825, "perf/samples_per_sec": 5.955468905034057, "perf/tokens_per_sec": 5360.108122933934, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 28801.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780314.9041886, "event": "eval_step", "step": 880, "epoch": 5, "metrics": {"eval/loss": 0.8351083437028604, "eval/duration_sec": 8.0127690047957}} +{"timestamp": 1774780343.2108471, "event": "train_step", "step": 885, "epoch": 5, "metrics": {"train/step_loss": 0.4285243514812354, "train/step_real_loss": 0.4419095665216446, "train/lr": 7.624854987882956e-05, "train/step_canary_loss": 0.00019741058349609375, "perf/step_duration_sec": 5.7643951191566885, "perf/samples_per_sec": 5.724798407786416, "perf/tokens_per_sec": 4315.630605772809, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 24877.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780371.488473, "event": "train_step", "step": 890, "epoch": 5, "metrics": {"train/step_loss": 0.4784579649567604, "train/step_real_loss": 0.4784579649567604, "train/lr": 7.5867308298906e-05, "perf/step_duration_sec": 5.160004794597626, "perf/samples_per_sec": 6.20154462521102, "perf/tokens_per_sec": 4217.825538221645, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 21764.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780379.5826719, "event": "eval_step", "step": 890, "epoch": 5, "metrics": {"eval/loss": 0.8355882657835115, "eval/duration_sec": 8.020379473920912}} +{"timestamp": 1774780407.0363395, "event": "train_step", "step": 895, "epoch": 5, "metrics": {"train/step_loss": 0.4277057566426017, "train/step_real_loss": 0.44083791226148605, "train/lr": 7.555521336697073e-05, "train/step_canary_loss": 0.007476806640625, "perf/step_duration_sec": 5.571806945372373, "perf/samples_per_sec": 5.922674694859615, "perf/tokens_per_sec": 4670.298209382936, "perf/logical_batch_size": 33.0, "perf/logical_token_count": 26022.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780435.2666588, "event": "train_step", "step": 900, "epoch": 5, "metrics": {"train/step_loss": 0.541201762855053, "train/step_real_loss": 0.541201762855053, "train/lr": 7.53123670536514e-05, "perf/step_duration_sec": 5.370066001079977, "perf/samples_per_sec": 5.9589584175621795, "perf/tokens_per_sec": 5174.051863498911, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 27785.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780443.346381, "event": "eval_step", "step": 900, "epoch": 5, "metrics": {"eval/loss": 0.8348742475112279, "eval/duration_sec": 8.047567105852067}} +{"timestamp": 1774780471.343544, "event": "train_step", "step": 905, "epoch": 5, "metrics": {"train/step_loss": 0.4496690407395363, "train/step_real_loss": 0.4496690407395363, "train/lr": 7.513884870400765e-05, "perf/step_duration_sec": 5.366967746987939, "perf/samples_per_sec": 5.962398417236457, "perf/tokens_per_sec": 4151.69254790196, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 22282.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780498.6208885, "event": "train_step", "step": 910, "epoch": 5, "metrics": {"train/step_loss": 0.47044774517416954, "train/step_real_loss": 0.47044774517416954, "train/lr": 7.503471501160667e-05, "perf/step_duration_sec": 5.56944148009643, "perf/samples_per_sec": 5.745638968352344, "perf/tokens_per_sec": 4738.895290366357, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 26393.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780506.7063458, "event": "eval_step", "step": 910, "epoch": 5, "metrics": {"eval/loss": 0.8341225324532925, "eval/duration_sec": 8.024419151246548}} +{"timestamp": 1774780534.016665, "event": "train_step", "step": 915, "epoch": 5, "metrics": {"train/step_loss": 0.41994332522153854, "train/step_real_loss": 0.41994332522153854, "train/lr": 7.5e-05, "perf/step_duration_sec": 5.601924208924174, "perf/samples_per_sec": 5.712322910228281, "perf/tokens_per_sec": 4364.928743778185, "perf/logical_batch_size": 32.0, "perf/logical_token_count": 24452.0, "perf/gradient_accumulation_steps": 8.0, "system/cuda_memory_allocated_gb": 13.062866687774658, "system/cuda_max_memory_allocated_gb": 45.563560009002686}} +{"timestamp": 1774780542.4377756, "event": "train_epoch", "step": 915, "epoch": 5, "metrics": {"train/epoch_loss": 0.46649868210017165, "train/epoch_real_loss": 0.4699867704318317, "train/epoch_canary_loss": 0.05231993769605954, "perf/epoch_duration_sec": 1165.6133534889668, "perf/epoch_samples_per_sec": 40.538313891620376, "perf/epoch_tokens_per_sec": 31915.27781373528, "perf/epoch_samples": 47252.0, "perf/epoch_tokens": 37200874.0, "system/cuda_epoch_peak_memory_gb": 45.563560009002686, "eval/loss": 0.8347259744619713, "eval/duration_sec": 8.043377958703786}} +{"timestamp": 1774780553.420012, "event": "audit_epoch", "step": 915, "epoch": 5, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 1.0, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.668, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0, "perf/audit_duration_sec": 6.541899859905243}} +{"timestamp": 1774780564.81766, "event": "audit_final", "step": 915, "epoch": 5, "metrics": {"audit/delta": 1e-05, "audit/num_canaries": 500.0, "audit/num_members": 250.0, "audit/paper_guess_fraction": 0.2, "audit/paper_guess_steps": 20.0, "audit/loss/auc": 1.0, "audit/loss/empirical_epsilon/0.05": 3.4791953936219215, "audit/loss/empirical_epsilon/0.01": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/loss/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/loss/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/loss/empirical_epsilon_details/0.01/correct_guesses": 100.0, "audit/embedding/auc": 0.668, "audit/embedding/empirical_epsilon/0.05": 3.4791953936219215, "audit/embedding/empirical_epsilon/0.01": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.05/epsilon": 3.4791953936219215, "audit/embedding/empirical_epsilon_details/0.05/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.05/correct_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/epsilon": 3.023197554051876, "audit/embedding/empirical_epsilon_details/0.01/num_guesses": 100.0, "audit/embedding/empirical_epsilon_details/0.01/correct_guesses": 100.0}} +{"timestamp": 1774780565.4842646, "event": "energy_final", "step": 915, "epoch": null, "metrics": {"energy/codecarbon/duration": 6111.527659785934, "energy/codecarbon/emissions": 0.18161032575848388, "energy/codecarbon/emissions_rate": 2.9716027786879898e-05, "energy/codecarbon/cpu_power": 80.03175773458268, "energy/codecarbon/gpu_power": 2964.403708111266, "energy/codecarbon/ram_power": 38.0, "energy/codecarbon/cpu_energy": 0.13077741875096305, "energy/codecarbon/gpu_energy": 5.0193755718861155, "energy/codecarbon/ram_energy": 0.062094110889303666, "energy/codecarbon/energy_consumed": 5.212247101526386, "energy/codecarbon/water_consumed": 0.0, "energy/codecarbon/cpu_count": 16.0, "energy/codecarbon/gpu_count": 8.0, "energy/codecarbon/longitude": 8.212, "energy/codecarbon/latitude": 47.4843, "energy/codecarbon/ram_total_size": 128.0, "energy/codecarbon/cpu_utilization_percent": 3.2804797896812357, "energy/codecarbon/gpu_utilization_percent": 95.01803319093, "energy/codecarbon/ram_utilization_percent": 25.6338317449885, "energy/codecarbon/ram_used_gb": 507.75555072550003, "energy/codecarbon/pue": 1.0, "energy/codecarbon/wue": 0.0}}