Spaces:
Sleeping
Sleeping
File size: 29,911 Bytes
ec4ae03 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | {"iteration": 0, "accuracy": 0.9162, "combined_score": 0.9162, "step_accuracy": 0.9111, "lccp": 0.8392, "correct_rate": 0.7867, "prm_mean": 0.8988, "prm_final": 0.9275, "sympy_mean": 0.0, "format_mean": 1.0, "n_scored": 150, "total": 150, "final_answer_correct": 118, "final_answer_accuracy": 0.7866666666666666}
{"iteration": 1, "loss": 0.0006103356778718686, "mean_reward": 0.914308755129325, "std_reward": 0.1636050993381563, "batch_accuracy": 0.96, "grounded_accuracy": 0.96, "gt_match_rate": 0.78, "step_accuracy": 0.8948611111111111, "lccp": 0.8141111111111111, "n_groups": 12, "skipped_groups": 8, "learning_rate": 1.0625000000000002e-06, "iter_time_s": 127.63799649500288, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 2, "loss": -3.432962815471304e-05, "mean_reward": 0.8478923191518654, "std_reward": 0.2160182166583165, "batch_accuracy": 0.9141414141414141, "grounded_accuracy": 0.9141414141414141, "gt_match_rate": 0.6515, "step_accuracy": 0.8666916416916417, "lccp": 0.7653809153809155, "n_groups": 18, "skipped_groups": 2, "learning_rate": 1.6250000000000001e-06, "iter_time_s": 199.5183933188673, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 3, "loss": 0.0003658987698145211, "mean_reward": 0.8963912433066207, "std_reward": 0.17069859725714537, "batch_accuracy": 0.9545454545454546, "grounded_accuracy": 0.9545454545454546, "gt_match_rate": 0.7071, "step_accuracy": 0.876897947731281, "lccp": 0.765237694404361, "n_groups": 12, "skipped_groups": 8, "learning_rate": 2.1875000000000002e-06, "iter_time_s": 189.83606291818433, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 4, "loss": 0.0009415318305731158, "mean_reward": 0.8654313890820613, "std_reward": 0.21875612713334075, "batch_accuracy": 0.8939393939393939, "grounded_accuracy": 0.8939393939393939, "gt_match_rate": 0.7323, "step_accuracy": 0.8585036876703543, "lccp": 0.7649821628988295, "n_groups": 11, "skipped_groups": 9, "learning_rate": 2.7500000000000004e-06, "iter_time_s": 182.12547484994866, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 5, "loss": 8.118284122815567e-05, "mean_reward": 0.8568747993989829, "std_reward": 0.23948718740823036, "batch_accuracy": 0.8844221105527639, "grounded_accuracy": 0.8844221105527639, "gt_match_rate": 0.6935, "step_accuracy": 0.9185, "lccp": 0.8431, "n_groups": 16, "skipped_groups": 4, "learning_rate": 3.3125000000000005e-06, "iter_time_s": 201.67919013393112, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0, "accuracy": 0.9192, "combined_score": 0.9192, "correct_rate": 0.7933, "prm_mean": 0.9035, "prm_final": 0.9305, "sympy_mean": 0.0, "format_mean": 0.9977, "n_scored": 150, "total": 150, "final_answer_correct": 119, "final_answer_accuracy": 0.7933333333333333}
{"iteration": 6, "loss": -6.271734067316477e-05, "mean_reward": 0.8792530329566163, "std_reward": 0.21531797453446344, "batch_accuracy": 0.9095477386934674, "grounded_accuracy": 0.9095477386934674, "gt_match_rate": 0.7487, "step_accuracy": 0.8846455219822055, "lccp": 0.8058971263242619, "n_groups": 12, "skipped_groups": 8, "learning_rate": 3.875e-06, "iter_time_s": 193.35031225602143, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 7, "loss": 0.0010708057920315436, "mean_reward": 0.8378877251545859, "std_reward": 0.2233563664223874, "batch_accuracy": 0.883248730964467, "grounded_accuracy": 0.883248730964467, "gt_match_rate": 0.6396, "step_accuracy": 0.8130725309659319, "lccp": 0.6580686304671076, "n_groups": 14, "skipped_groups": 6, "learning_rate": 4.4375e-06, "iter_time_s": 208.22394350194372, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 8, "loss": -0.0002566667799678376, "mean_reward": 0.8755362041151912, "std_reward": 0.20010863742401203, "batch_accuracy": 0.895, "grounded_accuracy": 0.895, "gt_match_rate": 0.69, "step_accuracy": 0.8647215007215007, "lccp": 0.7479280303030303, "n_groups": 13, "skipped_groups": 7, "learning_rate": 5e-06, "iter_time_s": 170.59595341305248, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 9, "loss": 5.9516330460004004e-05, "mean_reward": 0.906506146327221, "std_reward": 0.1769136401553803, "batch_accuracy": 0.9646464646464646, "grounded_accuracy": 0.9646464646464646, "gt_match_rate": 0.803, "step_accuracy": 0.8935726310726311, "lccp": 0.8175324675324676, "n_groups": 15, "skipped_groups": 5, "learning_rate": 4.995894997002465e-06, "iter_time_s": 221.35066892812029, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 10, "loss": 0.0004252615440886335, "mean_reward": 0.8807654454859567, "std_reward": 0.17550108931309533, "batch_accuracy": 0.9547738693467337, "grounded_accuracy": 0.9547738693467337, "gt_match_rate": 0.6834, "step_accuracy": 0.9205, "lccp": 0.8426, "n_groups": 14, "skipped_groups": 6, "learning_rate": 4.983594966720622e-06, "iter_time_s": 188.98177218902856, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0, "accuracy": 0.9199, "combined_score": 0.9199, "correct_rate": 0.7933, "prm_mean": 0.9066, "prm_final": 0.9408, "sympy_mean": 0.0, "format_mean": 0.998, "n_scored": 150, "total": 150, "final_answer_correct": 119, "final_answer_accuracy": 0.7933333333333333}
{"iteration": 11, "loss": -0.0005566358695432427, "mean_reward": 0.9698135460130081, "std_reward": 0.0983216960471261, "batch_accuracy": 0.985, "grounded_accuracy": 0.985, "gt_match_rate": 0.93, "step_accuracy": 0.9662678571428571, "lccp": 0.9218095238095237, "n_groups": 8, "skipped_groups": 12, "learning_rate": 4.963144790631074e-06, "iter_time_s": 141.96677790791728, "training_phase": "GROUNDED_ONLY", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 12, "loss": 7.270745637859883e-05, "mean_reward": 0.8492740230597824, "std_reward": 0.2128636238290247, "batch_accuracy": 0.9, "grounded_accuracy": 0.9, "gt_match_rate": 0.65, "step_accuracy": 0.8205257936507937, "lccp": 0.6872718253968253, "n_groups": 14, "skipped_groups": 6, "learning_rate": 4.934619089208618e-06, "iter_time_s": 177.9547567779664, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.0, "extraction_success_rate": 0.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 13, "loss": 0.00026773045517204864, "mean_reward": 0.8988236995312778, "std_reward": 0.18599151493605476, "batch_accuracy": 0.93, "grounded_accuracy": 0.93, "gt_match_rate": 0.78, "step_accuracy": 0.8709603174603174, "lccp": 0.7887301587301587, "n_groups": 14, "skipped_groups": 6, "learning_rate": 4.898121949644228e-06, "iter_time_s": 556.1856374200433, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.0, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": -0.04, "extraction_success_rate": 1.0, "chain_scoring_active": 0, "n_self_play_groups": 0, "q_gen_attempts": 0, "q_gen_valid": 0, "q_gen_valid_rate": 0.0, "mean_question_reward": 0.0, "q_quality_rate": 0.0, "q_topic_match": 0.0, "q_difficulty_fit": 0.0, "q_clarity": 0.0, "q_novelty": 0.0, "q_solvability": 0.0}
{"iteration": 14, "loss": 0.0004961729192069066, "mean_reward": 0.8558324048863098, "std_reward": 0.20849902292009304, "batch_accuracy": 0.9523809523809523, "grounded_accuracy": 0.9473684210526315, "gt_match_rate": 0.6737, "step_accuracy": 0.8576065162907268, "lccp": 0.7478070175438597, "n_groups": 18, "skipped_groups": 3, "learning_rate": 4.853786546042184e-06, "iter_time_s": 568.4005180909298, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.036, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.209, "extraction_success_rate": 1.0, "chain_scoring_active": 0, "n_self_play_groups": 1, "q_gen_attempts": 1, "q_gen_valid": 1, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.763, "q_quality_rate": 1.0, "q_topic_match": 0.575, "q_difficulty_fit": 0.89, "q_clarity": 1.0, "q_novelty": 0.4289, "q_solvability": 1.0}
{"iteration": 15, "loss": 2.3262581635208335e-05, "mean_reward": 0.927972135586315, "std_reward": 0.16718736928397065, "batch_accuracy": 0.937799043062201, "grounded_accuracy": 0.9312169312169312, "gt_match_rate": 0.836, "step_accuracy": 0.9242, "lccp": 0.8424, "n_groups": 12, "skipped_groups": 9, "learning_rate": 4.801774653482204e-06, "iter_time_s": 550.1437717408407, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.071, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.079, "extraction_success_rate": 0.98, "chain_scoring_active": 0, "n_self_play_groups": 1, "q_gen_attempts": 1, "q_gen_valid": 1, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.7218, "q_quality_rate": 1.0, "q_topic_match": 0.35, "q_difficulty_fit": 0.9511, "q_clarity": 1.0, "q_novelty": 0.458, "q_solvability": 1.0, "accuracy": 0.9262, "combined_score": 0.9262, "correct_rate": 0.8, "prm_mean": 0.9072, "prm_final": 0.9404, "sympy_mean": 0.0, "format_mean": 1.0, "n_scored": 150, "total": 150, "final_answer_correct": 120, "final_answer_accuracy": 0.8}
{"iteration": 16, "loss": 0.0003296181123005226, "mean_reward": 0.9146047620088099, "std_reward": 0.17273258044260062, "batch_accuracy": 0.9497716894977168, "grounded_accuracy": 0.9385474860335196, "gt_match_rate": 0.8324, "step_accuracy": 0.8955234709424654, "lccp": 0.8438994897095455, "n_groups": 15, "skipped_groups": 7, "learning_rate": 4.742276057719723e-06, "iter_time_s": 575.5289459908381, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.107, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.089, "extraction_success_rate": 0.94, "chain_scoring_active": 0, "n_self_play_groups": 2, "q_gen_attempts": 2, "q_gen_valid": 2, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.7878, "q_quality_rate": 1.0, "q_topic_match": 0.875, "q_difficulty_fit": 0.5838, "q_clarity": 1.0, "q_novelty": 0.4475, "q_solvability": 0.96}
{"iteration": 17, "loss": -0.00013719029248022708, "mean_reward": 0.8881227328092163, "std_reward": 0.1950058307020988, "batch_accuracy": 0.9383259911894273, "grounded_accuracy": 0.9161676646706587, "gt_match_rate": 0.7006, "step_accuracy": 0.8557955517536356, "lccp": 0.7682349586541203, "n_groups": 20, "skipped_groups": 3, "learning_rate": 4.675507862678258e-06, "iter_time_s": 616.0185732548125, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.143, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": -0.191, "extraction_success_rate": 1.0, "chain_scoring_active": 0, "n_self_play_groups": 3, "q_gen_attempts": 3, "q_gen_valid": 3, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.7982, "q_quality_rate": 1.0, "q_topic_match": 0.69, "q_difficulty_fit": 0.8892, "q_clarity": 1.0, "q_novelty": 0.4616, "q_solvability": 1.0}
{"iteration": 18, "loss": 7.917114673641903e-05, "mean_reward": 0.8664005137011263, "std_reward": 0.178010205898339, "batch_accuracy": 0.9539748953974896, "grounded_accuracy": 0.9433962264150944, "gt_match_rate": 0.5912, "step_accuracy": 0.830780173704702, "lccp": 0.6920110811620246, "n_groups": 19, "skipped_groups": 5, "learning_rate": 4.601713698260728e-06, "iter_time_s": 550.572628196096, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.179, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.021, "extraction_success_rate": 0.98, "chain_scoring_active": 0, "n_self_play_groups": 4, "q_gen_attempts": 4, "q_gen_valid": 4, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.7394, "q_quality_rate": 1.0, "q_topic_match": 0.6375, "q_difficulty_fit": 0.6293, "q_clarity": 1.0, "q_novelty": 0.452, "q_solvability": 0.9762}
{"iteration": 19, "loss": 0.00015087392284840462, "mean_reward": 0.8912812767256229, "std_reward": 0.1726645221785555, "batch_accuracy": 0.9535864978902954, "grounded_accuracy": 0.9490445859872612, "gt_match_rate": 0.7643, "step_accuracy": 0.8513975055376328, "lccp": 0.7568744772566428, "n_groups": 16, "skipped_groups": 8, "learning_rate": 4.521162831370364e-06, "iter_time_s": 522.4289600129705, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.214, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.075, "extraction_success_rate": 0.98, "chain_scoring_active": 0, "n_self_play_groups": 4, "q_gen_attempts": 4, "q_gen_valid": 4, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.7331, "q_quality_rate": 1.0, "q_topic_match": 0.4813, "q_difficulty_fit": 0.8466, "q_clarity": 1.0, "q_novelty": 0.4564, "q_solvability": 0.9725}
{"iteration": 20, "loss": 0.00024373266084391312, "mean_reward": 0.8962914079724992, "std_reward": 0.1778417367801085, "batch_accuracy": 0.927710843373494, "grounded_accuracy": 0.9060402684563759, "gt_match_rate": 0.7987, "step_accuracy": 0.9253, "lccp": 0.8428, "n_groups": 18, "skipped_groups": 7, "learning_rate": 4.434149183384978e-06, "iter_time_s": 619.8863487117924, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.25, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": -0.118, "extraction_success_rate": 0.96, "chain_scoring_active": 0, "n_self_play_groups": 5, "q_gen_attempts": 5, "q_gen_valid": 5, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.77, "q_quality_rate": 1.0, "q_topic_match": 0.723, "q_difficulty_fit": 0.703, "q_clarity": 1.0, "q_novelty": 0.4741, "q_solvability": 0.945, "accuracy": 0.9234, "combined_score": 0.9234, "correct_rate": 0.8, "prm_mean": 0.9056, "prm_final": 0.9353, "sympy_mean": 0.0, "format_mean": 1.0, "n_scored": 150, "total": 150, "final_answer_correct": 120, "final_answer_accuracy": 0.8}
{"iteration": 21, "loss": 0.0001916794737033862, "mean_reward": 0.8417323480901788, "std_reward": 0.1879809468583581, "batch_accuracy": 0.9230769230769231, "grounded_accuracy": 0.9142857142857143, "gt_match_rate": 0.7357, "step_accuracy": 0.8195039682539682, "lccp": 0.6930612244897959, "n_groups": 21, "skipped_groups": 5, "learning_rate": 4.340990257669732e-06, "iter_time_s": 490.36693838005885, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.286, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.209, "extraction_success_rate": 0.92, "chain_scoring_active": 0, "n_self_play_groups": 6, "q_gen_attempts": 6, "q_gen_valid": 6, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6972, "q_quality_rate": 1.0, "q_topic_match": 0.5742, "q_difficulty_fit": 0.4754, "q_clarity": 1.0, "q_novelty": 0.4493, "q_solvability": 0.9625}
{"iteration": 22, "loss": 0.000578732604299148, "mean_reward": 0.9175190043251262, "std_reward": 0.12424225720214971, "batch_accuracy": 0.984313725490196, "grounded_accuracy": 0.9852941176470589, "gt_match_rate": 0.9044, "step_accuracy": 0.9647345301757068, "lccp": 0.9284886681945506, "n_groups": 20, "skipped_groups": 6, "learning_rate": 4.2420259810417895e-06, "iter_time_s": 611.8722857821267, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.321, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.145, "extraction_success_rate": 0.92, "chain_scoring_active": 0, "n_self_play_groups": 6, "q_gen_attempts": 6, "q_gen_valid": 6, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6998, "q_quality_rate": 1.0, "q_topic_match": 0.6189, "q_difficulty_fit": 0.3856, "q_clarity": 1.0, "q_novelty": 0.4571, "q_solvability": 0.979}
{"iteration": 23, "loss": 0.0006137362383419208, "mean_reward": 0.9206978778568132, "std_reward": 0.14741914089456262, "batch_accuracy": 0.9770114942528736, "grounded_accuracy": 0.9508196721311475, "gt_match_rate": 0.8033, "step_accuracy": 0.9075003548364204, "lccp": 0.847631466893762, "n_groups": 18, "skipped_groups": 9, "learning_rate": 4.137617463414222e-06, "iter_time_s": 444.32088500098325, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.357, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.143, "extraction_success_rate": 1.0, "chain_scoring_active": 0, "n_self_play_groups": 7, "q_gen_attempts": 7, "q_gen_valid": 7, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.726, "q_quality_rate": 1.0, "q_topic_match": 0.5621, "q_difficulty_fit": 0.6634, "q_clarity": 1.0, "q_novelty": 0.4412, "q_solvability": 0.9885}
{"iteration": 24, "loss": -0.00021296025724950595, "mean_reward": 0.8795895609748888, "std_reward": 0.1733128827089799, "batch_accuracy": 0.9357142857142857, "grounded_accuracy": 0.9333333333333333, "gt_match_rate": 0.7917, "step_accuracy": 0.8988194444444446, "lccp": 0.8122916666666666, "n_groups": 20, "skipped_groups": 8, "learning_rate": 4.0281456801451e-06, "iter_time_s": 471.6989622868132, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.393, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.082, "extraction_success_rate": 0.98, "chain_scoring_active": 0, "n_self_play_groups": 8, "q_gen_attempts": 8, "q_gen_valid": 8, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6621, "q_quality_rate": 1.0, "q_topic_match": 0.5344, "q_difficulty_fit": 0.3108, "q_clarity": 1.0, "q_novelty": 0.4408, "q_solvability": 0.9688}
{"iteration": 25, "loss": 0.0003441530472758002, "mean_reward": 0.8445275205076134, "std_reward": 0.20865777545087066, "batch_accuracy": 0.9273356401384083, "grounded_accuracy": 0.8532110091743119, "gt_match_rate": 0.6055, "step_accuracy": 0.9198, "lccp": 0.8468, "n_groups": 28, "skipped_groups": 1, "learning_rate": 3.9140100818997275e-06, "iter_time_s": 524.655717118876, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.429, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.127, "extraction_success_rate": 0.94, "chain_scoring_active": 0, "n_self_play_groups": 9, "q_gen_attempts": 9, "q_gen_valid": 9, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6471, "q_quality_rate": 1.0, "q_topic_match": 0.505, "q_difficulty_fit": 0.2634, "q_clarity": 1.0, "q_novelty": 0.4394, "q_solvability": 0.9672, "accuracy": 0.9221, "combined_score": 0.9221, "correct_rate": 0.7933, "prm_mean": 0.9034, "prm_final": 0.9329, "sympy_mean": 0.0, "format_mean": 1.0, "n_scored": 150, "total": 150, "final_answer_correct": 119, "final_answer_accuracy": 0.7933333333333333}
{"iteration": 26, "loss": 0.0004209962865808428, "mean_reward": 0.8666489827432893, "std_reward": 0.1796360842988206, "batch_accuracy": 0.9204152249134948, "grounded_accuracy": 0.926605504587156, "gt_match_rate": 0.789, "step_accuracy": 0.8898463666812292, "lccp": 0.7943024610455803, "n_groups": 26, "skipped_groups": 3, "learning_rate": 3.795627137098479e-06, "iter_time_s": 509.6774504878558, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.464, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.065, "extraction_success_rate": 0.94, "chain_scoring_active": 0, "n_self_play_groups": 9, "q_gen_attempts": 9, "q_gen_valid": 9, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6792, "q_quality_rate": 1.0, "q_topic_match": 0.6639, "q_difficulty_fit": 0.2476, "q_clarity": 1.0, "q_novelty": 0.4488, "q_solvability": 0.9317}
{"iteration": 27, "loss": -0.00022697661013808103, "mean_reward": 0.877933982604161, "std_reward": 0.1628662024521015, "batch_accuracy": 0.9563758389261745, "grounded_accuracy": 0.9393939393939394, "gt_match_rate": 0.6869, "step_accuracy": 0.8616281866281865, "lccp": 0.7406565656565657, "n_groups": 25, "skipped_groups": 5, "learning_rate": 3.673428812268702e-06, "iter_time_s": 597.5212381640449, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.5, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.067, "extraction_success_rate": 0.92, "chain_scoring_active": 0, "n_self_play_groups": 10, "q_gen_attempts": 10, "q_gen_valid": 10, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6831, "q_quality_rate": 1.0, "q_topic_match": 0.5699, "q_difficulty_fit": 0.3583, "q_clarity": 1.0, "q_novelty": 0.4584, "q_solvability": 0.9759}
{"iteration": 28, "loss": 4.199455770111822e-05, "mean_reward": 0.8695997487614422, "std_reward": 0.15915376074701193, "batch_accuracy": 0.9419354838709677, "grounded_accuracy": 0.8777777777777778, "gt_match_rate": 0.6556, "step_accuracy": 0.8334434828062279, "lccp": 0.6186230200445887, "n_groups": 29, "skipped_groups": 2, "learning_rate": 3.5478609958457035e-06, "iter_time_s": 603.0997926741838, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.536, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.047, "extraction_success_rate": 0.8, "chain_scoring_active": 0, "n_self_play_groups": 11, "q_gen_attempts": 11, "q_gen_valid": 11, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6693, "q_quality_rate": 1.0, "q_topic_match": 0.5931, "q_difficulty_fit": 0.23, "q_clarity": 1.0, "q_novelty": 0.4489, "q_solvability": 0.9836}
{"iteration": 29, "loss": 0.0003765096731578004, "mean_reward": 0.8674408392873937, "std_reward": 0.17082623284979875, "batch_accuracy": 0.9470198675496688, "grounded_accuracy": 0.8928571428571429, "gt_match_rate": 0.7262, "step_accuracy": 0.8674065194639727, "lccp": 0.7603936306964257, "n_groups": 28, "skipped_groups": 3, "learning_rate": 3.419381871174205e-06, "iter_time_s": 579.6904674370307, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.571, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.123, "extraction_success_rate": 0.84, "chain_scoring_active": 0, "n_self_play_groups": 11, "q_gen_attempts": 11, "q_gen_valid": 11, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6496, "q_quality_rate": 1.0, "q_topic_match": 0.5636, "q_difficulty_fit": 0.1695, "q_clarity": 1.0, "q_novelty": 0.4425, "q_solvability": 0.9739}
{"iteration": 30, "loss": -0.00029927124827130075, "mean_reward": 0.8705812118012987, "std_reward": 0.16025951815561293, "batch_accuracy": 0.9655172413793104, "grounded_accuracy": 0.95, "gt_match_rate": 0.8, "step_accuracy": 0.9232, "lccp": 0.85, "n_groups": 27, "skipped_groups": 5, "learning_rate": 3.2884602446470037e-06, "iter_time_s": 503.08798154001124, "training_phase": "SELFPLAY_RAMP", "effective_sp_ratio": 0.607, "selfplay_suspended": 0, "chain_arith_score": null, "chain_dep_score": null, "chain_integrity_score": null, "sp_chain_integrity_score": null, "chain_prm_correlation": 0.099, "extraction_success_rate": 0.92, "chain_scoring_active": 0, "n_self_play_groups": 12, "q_gen_attempts": 12, "q_gen_valid": 12, "q_gen_valid_rate": 1.0, "mean_question_reward": 0.6764, "q_quality_rate": 1.0, "q_topic_match": 0.6752, "q_difficulty_fit": 0.1485, "q_clarity": 1.0, "q_novelty": 0.4566, "q_solvability": 0.9699, "accuracy": 0.9204, "combined_score": 0.9204, "correct_rate": 0.7933, "prm_mean": 0.9044, "prm_final": 0.9289, "sympy_mean": 0.0, "format_mean": 1.0, "n_scored": 150, "total": 150, "final_answer_correct": 119, "final_answer_accuracy": 0.7933333333333333} |