veriloop-coder-e1 / toolspec_adapter /epoch_history.json
ConorWang's picture
Upload toolspec_qwen36_rootfix_run1 contents into toolspec_adapter
98746ed verified
raw
history blame
4.41 kB
{
"epochs": [
{
"epoch": 1,
"eval_metrics": {
"avg_binary_accuracy": 0.795,
"confirmation_required_accuracy": 0.8,
"count": 25,
"eval_batches": 25,
"eval_loss": 7.704372234344483,
"mode_accuracy": 0.44,
"permission_bit_accuracy": 0.850909090909091,
"precondition_ok_accuracy": 0.76,
"quality_score": 0.21877532557574186,
"rollback_supported_accuracy": 0.72,
"schema_ok_accuracy": 0.76,
"session_required_accuracy": 1.0,
"tool_accuracy": 0.24,
"trigger_accuracy": 0.76,
"validator_required_accuracy": 0.8,
"worktree_required_accuracy": 0.76
},
"improved": true,
"quality_score": 0.21877532557574186,
"train_metrics": {
"loss": 0.5388983534915107,
"micro_batches": 70,
"optimizer_steps": 5
}
},
{
"epoch": 2,
"eval_metrics": {
"avg_binary_accuracy": 0.81,
"confirmation_required_accuracy": 0.8,
"count": 25,
"eval_batches": 25,
"eval_loss": 7.129253711700439,
"mode_accuracy": 0.48,
"permission_bit_accuracy": 0.8545454545454545,
"precondition_ok_accuracy": 0.76,
"quality_score": 0.4400157397790389,
"rollback_supported_accuracy": 0.8,
"schema_ok_accuracy": 0.76,
"session_required_accuracy": 1.0,
"tool_accuracy": 0.44,
"trigger_accuracy": 0.76,
"validator_required_accuracy": 0.8,
"worktree_required_accuracy": 0.8
},
"improved": true,
"quality_score": 0.4400157397790389,
"train_metrics": {
"loss": 0.4655745736190251,
"micro_batches": 70,
"optimizer_steps": 5
}
},
{
"epoch": 3,
"eval_metrics": {
"avg_binary_accuracy": 0.81,
"confirmation_required_accuracy": 0.8,
"count": 25,
"eval_batches": 25,
"eval_loss": 6.935494079589843,
"mode_accuracy": 0.48,
"permission_bit_accuracy": 0.8545454545454545,
"precondition_ok_accuracy": 0.76,
"quality_score": 0.4229221342606978,
"rollback_supported_accuracy": 0.8,
"schema_ok_accuracy": 0.76,
"session_required_accuracy": 1.0,
"tool_accuracy": 0.4,
"trigger_accuracy": 0.76,
"validator_required_accuracy": 0.8,
"worktree_required_accuracy": 0.8
},
"improved": false,
"quality_score": 0.4229221342606978,
"train_metrics": {
"loss": 0.43205853487764084,
"micro_batches": 70,
"optimizer_steps": 5
}
},
{
"epoch": 4,
"eval_metrics": {
"avg_binary_accuracy": 0.81,
"confirmation_required_accuracy": 0.8,
"count": 25,
"eval_batches": 25,
"eval_loss": 6.897225952148437,
"mode_accuracy": 0.56,
"permission_bit_accuracy": 0.8545454545454545,
"precondition_ok_accuracy": 0.76,
"quality_score": 0.46349615617231893,
"rollback_supported_accuracy": 0.8,
"schema_ok_accuracy": 0.76,
"session_required_accuracy": 1.0,
"tool_accuracy": 0.44,
"trigger_accuracy": 0.76,
"validator_required_accuracy": 0.8,
"worktree_required_accuracy": 0.8
},
"improved": true,
"quality_score": 0.46349615617231893,
"train_metrics": {
"loss": 0.42175399448190415,
"micro_batches": 70,
"optimizer_steps": 5
}
},
{
"epoch": 5,
"eval_metrics": {
"avg_binary_accuracy": 0.81,
"confirmation_required_accuracy": 0.8,
"count": 25,
"eval_batches": 25,
"eval_loss": 6.89590030670166,
"mode_accuracy": 0.56,
"permission_bit_accuracy": 0.8545454545454545,
"precondition_ok_accuracy": 0.76,
"quality_score": 0.4635160408540206,
"rollback_supported_accuracy": 0.8,
"schema_ok_accuracy": 0.76,
"session_required_accuracy": 1.0,
"tool_accuracy": 0.44,
"trigger_accuracy": 0.76,
"validator_required_accuracy": 0.8,
"worktree_required_accuracy": 0.8
},
"improved": false,
"quality_score": 0.4635160408540206,
"train_metrics": {
"loss": 0.4198248211826597,
"micro_batches": 70,
"optimizer_steps": 5
}
}
]
}