{ "base_model": "sky_v1_3_5_5b_sky_runtime", "curriculum_dir": "csd-dataset", "final_dir": "sky_v1_3_csd_final", "total_minutes": 11.612064441045126, "stages": [ { "train_runtime": 330.6326, "train_samples_per_second": 2.767, "train_steps_per_second": 0.172, "train_loss": 1.0252654144638462, "epoch": 0.9967213114754099, "stage": "stage1_scaffold", "minutes": 5.513923645019531, "train_rows": 915, "val_rows": 58, "learning_rate": 5e-07, "epochs": 1.0 }, { "train_runtime": 205.8567, "train_samples_per_second": 5.446, "train_steps_per_second": 0.34, "train_loss": 1.0430820686476572, "epoch": 0.9991079393398751, "stage": "stage2_bridge", "minutes": 3.435288441181183, "train_rows": 1121, "val_rows": 71, "learning_rate": 5e-07, "epochs": 1.0 }, { "train_runtime": 116.6573, "train_samples_per_second": 5.803, "train_steps_per_second": 0.36, "train_loss": 0.8398375312487284, "epoch": 0.9926144756277696, "stage": "stage3_clean", "minutes": 1.9487456480662029, "train_rows": 677, "val_rows": 43, "learning_rate": 4e-07, "epochs": 1.0 } ] }