veriloop-coder-e1 / uncertainty_adapter_train_result.json
ConorWang's picture
Upload uncertainty adapter artifacts to repo root
a856587 verified
raw
history blame
21 kB
{
"artifacts": {
"adapter_dir": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/adapter",
"best_checkpoint_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/best_checkpoint_manifest.json",
"epoch_history": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/epoch_history.json",
"eval_jsonl": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_eval.jsonl",
"host_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/host_manifest.json",
"plan_json": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_adapter_plan.json",
"tokenizer_dir": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/tokenizer",
"train_jsonl": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_train.jsonl",
"training_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_training_manifest.json",
"uncertainty_head": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_head.pt"
},
"dataset": {
"eval_modes": {
"conflicting_evidence": 5,
"evidence_gap": 5,
"exec_required": 5,
"high_risk": 5,
"low_uncertainty": 5,
"patch_pending": 5,
"reverse_engineering_ambiguity": 5,
"self_check_failure": 5,
"spec_mismatch": 5,
"validator_negation": 5,
"worktree_conflict": 5
},
"eval_size": 55,
"train_modes": {
"conflicting_evidence": 14,
"evidence_gap": 14,
"exec_required": 14,
"high_risk": 14,
"low_uncertainty": 14,
"patch_pending": 14,
"reverse_engineering_ambiguity": 14,
"self_check_failure": 14,
"spec_mismatch": 14,
"validator_negation": 14,
"worktree_conflict": 14
},
"train_size": 154
},
"eval_metrics": {
"adapter_exported": true,
"auto_lora_from_ia3": false,
"best_epoch": 2,
"best_quality_score": 0.8625935807221907,
"count": 55,
"mae": {
"u_answer": 0.15174226462841034,
"u_evidence": 0.19610758125782013,
"u_exec": 0.18561214208602905,
"u_risk": 0.1553734689950943,
"u_spec": 0.21633382141590118
},
"mean_mae": 0.18103384971618652,
"mean_rmse": 0.24169571697711945,
"moderate_accuracy": 0.6727272727272727,
"peft_method": "lora_narrow",
"quality_score": 0.8625935807221907,
"rmse": {
"u_answer": 0.18724055588245392,
"u_evidence": 0.22527915239334106,
"u_exec": 0.25238174200057983,
"u_risk": 0.20667441189289093,
"u_spec": 0.3369026482105255
},
"tight_accuracy": 0.4,
"used_peft": true,
"weighted_mae": 0.18083095811830807,
"weighted_rmse": 0.24125460771003793
},
"plan": {
"adapter_alpha": 16,
"adapter_dropout": 0.05,
"adapter_rank": 8,
"backbone_manifest": {
"host_inventory_size": 79
},
"config": {
"allow_backbone_bridge": false,
"backbone": "/public/wang_libo/veriloop_coder_e1/model",
"bf16": true,
"cache_dir": null,
"cpu_max_memory_gib": 96,
"dataset_jsonl": null,
"dim_weights": {
"u_answer": 1.1,
"u_evidence": 1.35,
"u_exec": 1.35,
"u_risk": 1.55,
"u_spec": 1.25
},
"early_stopping_min_delta": 0.001,
"early_stopping_patience": 2,
"enable_synthetic_dataset": true,
"eval_jsonl": null,
"eval_samples_per_mode": 5,
"fp16": false,
"gpu_max_memory_gib": 44,
"gradient_accumulation_steps": 16,
"host_dropout": 0.0,
"learning_rate": 2e-05,
"local_files_only": true,
"logging_steps": 10,
"max_grad_norm": 1.0,
"max_length": 1664,
"min_epochs_before_early_stop": 2,
"num_train_epochs": 4.0,
"output_dir": "./outputs/uncertainty_qwen36_rootfix_run1",
"per_device_eval_batch_size": 1,
"per_device_train_batch_size": 1,
"prefer_best_checkpoint_export": true,
"probe_dropout": 0.03,
"probe_hidden_factor": 0.75,
"product_line": "veriloop_coder",
"quantization_mode": "4bit",
"quantization_required": false,
"require_trainable_targets": true,
"revision": null,
"run_post_train_eval": true,
"save_best_checkpoint": true,
"seed": 11,
"selection_mode": "minimal",
"train_samples_per_mode": 14,
"training_mode": "mounted_head",
"trust_remote_code": true,
"use_double_quant": true,
"warmup_ratio": 0.05,
"weight_decay": 0.0,
"weighted_mae_penalty": 0.5,
"weighted_rmse_penalty": 0.5
},
"dataset_summary": {
"eval_modes": {
"conflicting_evidence": 5,
"evidence_gap": 5,
"exec_required": 5,
"high_risk": 5,
"low_uncertainty": 5,
"patch_pending": 5,
"reverse_engineering_ambiguity": 5,
"self_check_failure": 5,
"spec_mismatch": 5,
"validator_negation": 5,
"worktree_conflict": 5
},
"eval_size": 55,
"train_modes": {
"conflicting_evidence": 14,
"evidence_gap": 14,
"exec_required": 14,
"high_risk": 14,
"low_uncertainty": 14,
"patch_pending": 14,
"reverse_engineering_ambiguity": 14,
"self_check_failure": 14,
"spec_mismatch": 14,
"validator_negation": 14,
"worktree_conflict": 14
},
"train_size": 154
},
"head_strategy": "host_head",
"notes": [
"Primary route is host-surface-first uncertainty training.",
"Validator receipts, self-check failure, reverse-engineering ambiguity, worktree conflicts, and patch continuity are first-class signals.",
"DualPath, Full AttnRes, mHC, routers, experts, and broad attention-layer PEFT stay excluded.",
"This adapter should improve runtime uncertainty routing, not general coding free-formity."
],
"peft_method": "lora_narrow",
"recipe": {
"adapter_family": "uncertainty",
"backbone": "/public/wang_libo/veriloop_coder_e1/model",
"backbone_family": "qwen_dense",
"excluded_patterns": [
"(?i)\\bdualpath\\b",
"(?i)\\bmhc\\b",
"(?i)\\bfull[_\\- ]?attnres\\b",
"(?i)\\battnres(_full)?\\b",
"(?i)\\brouter\\b",
"(?i)\\bexperts?\\b",
"(?i)\\bmoe\\b.*\\b(gate|router|expert)\\b",
"(?i)\\brope\\b.*\\b(freq|inv_freq|theta|rotary)\\b",
"(?i)\\bkvcache\\b",
"(?i)\\bposition_embedding\\b",
"(?i)\\bembed(tokens|ding)?\\b",
"(?i)\\blm_head\\b"
],
"harness_constraints": [
"Harness Engineering remains the primary convergence layer.",
"Adapter must not bypass runtime orchestrator / validator / rollback loops.",
"Adapter outputs remain subordinate to VeriLoop control-plane decisions.",
"Adapter must not create hidden prompt-style memory authority.",
"Adapter must support bounded uncertainty calibration rather than generic hesitation.",
"Validator and receipt evidence must remain able to update uncertainty."
],
"hyperparams": {
"alpha": 16,
"bias": "none",
"dropout": 0.05,
"fan_in_fan_out": false,
"modules_to_save": [
"input_layernorm"
],
"r": 8,
"task_type": "CAUSAL_LM"
},
"merge_policy": "merge_after_guard",
"metadata": {
"allow_backbone_bridge": false,
"allow_vla_action_expert": false,
"harness_first": true,
"prefer_explicit_heads": true,
"prefer_qlora_for_backbone_bridge": true,
"require_harness_first": true,
"selector_group_count": 2,
"strict_narrow_scope": true,
"trainer": "veriloop.uncertainty_adapter_trainer.v5.qwen36",
"uncertainty_training": true
},
"notes": [
"Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op.",
"Backbone family inferred as qwen_dense.",
"PEFT method resolved as lora_narrow.",
"Recipe is harness-first: runtime convergence remains in VeriLoop control-plane + harness, not in broad weight surgery.",
"Block AttnRes, DualPath, mHC hooks, RoPE, KV-cache, and broad MoE routing remain structurally excluded."
],
"peft_method": "lora_narrow",
"precision_policy": "auto",
"product_line": "veriloop_coder",
"regression_requirements": [
"Must pass PEFT regression guard structural policy checks.",
"Must not introduce forbidden backbone/serving structural targets.",
"Must preserve harness regression envelope for the selected product line.",
"Budgeted uncertainty convergence must not regress.",
"Uncertainty calibration must not collapse into generic caution."
],
"target_groups": [
{
"alpha": 16,
"dropout": 0.0,
"name": "group_1_custom_control_head",
"rank": 8,
"rationale": "Prefer explicit uncertainty / calibration heads over backbone surgery.",
"surface": "custom_control_head",
"target_modules": [
"uncertainty_head",
"uncertainty_head.calibration_mlp",
"uncertainty_head.proj"
]
},
{
"alpha": 16,
"dropout": 0.0,
"name": "group_2_custom_validator_bridge",
"rank": 8,
"rationale": "Validation and rollback fidelity should prefer explicit validator / rollback bridges.",
"surface": "custom_validator_bridge",
"target_modules": [
"failure_signal_bridge",
"failure_signal_bridge.rollback_bridge",
"rollback_adapter",
"rollback_adapter.head",
"sandbox_rollback_bridge",
"sandbox_rollback_bridge.adapter",
"validator_feedback_bridge",
"validator_feedback_bridge.adapter",
"validator_feedback_loop.rollback_adapter"
]
}
],
"target_modules": [
"uncertainty_head",
"uncertainty_head.calibration_mlp",
"uncertainty_head.proj",
"failure_signal_bridge",
"failure_signal_bridge.rollback_bridge",
"rollback_adapter",
"rollback_adapter.head",
"sandbox_rollback_bridge",
"sandbox_rollback_bridge.adapter",
"validator_feedback_bridge",
"validator_feedback_bridge.adapter",
"validator_feedback_loop.rollback_adapter"
],
"version": "veriloop.lora_recipe_veriloop.v2"
},
"selected_target_modules": [
"uncertainty_head",
"uncertainty_head.calibration_mlp",
"uncertainty_head.proj"
],
"target_selection": {
"backbone_archetype": "qwen_dense",
"exclusions": [
{
"pattern": "(^|\\.)lm_head($|\\.)",
"reason": "Do not retune final token head; too broad and evaluation-heavy."
},
{
"pattern": "(^|\\.)embed_tokens($|\\.)",
"reason": "Embedding surgery risks broad semantic drift."
},
{
"pattern": "(^|\\.)norm($|\\.)",
"reason": "Global norm tuning can destabilize calibration across scenes."
},
{
"pattern": "attnres|attention_residual",
"reason": "Block AttnRes may be mounted structurally but is never a PEFT target."
},
{
"pattern": "dualpath",
"reason": "DualPath is serving/runtime infrastructure only."
},
{
"pattern": "mhc|hyper[-_]?connection",
"reason": "mHC-inspired stability hooks remain structural, not PEFT surfaces."
},
{
"pattern": "rope|rotary",
"reason": "RoPE/context surgery is handled architecturally, not by narrow PEFT here."
},
{
"pattern": "kvcache|kv_cache",
"reason": "KV-cache runtime surfaces are not PEFT targets."
},
{
"pattern": "(^|\\.)memory(_store|_bank)?($|\\.)",
"reason": "Persistent memory stores are harness/runtime policy surfaces, not PEFT targets."
}
],
"inventory_size": 79,
"inventory_source": "provided_names",
"notes": [
"Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.",
"Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op."
],
"recommended_training": "ia3_head_only",
"request": {
"allow_backbone_bridge": false,
"allow_vla_action_expert": false,
"backbone": "/public/wang_libo/veriloop_coder_e1/model",
"explicit_exclude_patterns": [],
"explicit_include_patterns": [],
"intents": [
"uncertainty",
"validator_alignment",
"harness_alignment",
"runtime_protocol",
"session_continuity",
"worktree_discipline"
],
"prefer_qlora_for_backbone_bridge": true,
"product_line": "veriloop_coder",
"selection_mode": "minimal"
},
"selected_groups": [
{
"alpha": 16,
"dropout": 0.0,
"exclude_patterns": [
"(^|\\.)lm_head($|\\.)",
"(^|\\.)embed_tokens($|\\.)",
"(^|\\.)norm($|\\.)",
"attnres|attention_residual",
"dualpath",
"mhc|hyper[-_]?connection",
"rope|rotary",
"kvcache|kv_cache",
"(^|\\.)memory(_store|_bank)?($|\\.)"
],
"include_patterns": [
"(^|\\.)(uncertainty_head|uncertainty_adapter|calib(_head|ration_adapter)?)($|\\.)"
],
"intents": [
"uncertainty"
],
"layer_window": {
"mode": "all",
"value": 0.0
},
"matched_module_names": [
"uncertainty_head",
"uncertainty_head.calibration_mlp",
"uncertainty_head.proj"
],
"name": "group_1_custom_control_head",
"rank": 8,
"rationale": "Prefer explicit uncertainty / calibration heads over backbone surgery.",
"risk": "low",
"surface": "custom_control_head"
},
{
"alpha": 16,
"dropout": 0.05,
"exclude_patterns": [
"(^|\\.)lm_head($|\\.)",
"(^|\\.)embed_tokens($|\\.)",
"(^|\\.)norm($|\\.)",
"attnres|attention_residual",
"dualpath",
"mhc|hyper[-_]?connection",
"rope|rotary",
"kvcache|kv_cache",
"(^|\\.)memory(_store|_bank)?($|\\.)"
],
"include_patterns": [
"(^|\\.)(runtime_harness|query_runtime|task_brief|task_runtime|tool_protocol|permission_context|worktree|session_state|request_normalizer|action_allowlist|constraint_guard|progress_state|workspace_snapshot|repo_contract|knowledge_entry|completion_criteria|search_bridge|sandbox_search_bridge)(_adapter|_bridge|_head)?($|\\.)",
"(^|\\.)(toolspec|tool(_call)?(_grammar|_interface)?|harness|validator|rollback|receipt|patch|permission|session|worktree)(_adapter|_bridge|_head)?($|\\.)"
],
"intents": [
"harness_alignment",
"runtime_protocol",
"session_continuity",
"worktree_discipline"
],
"layer_window": {
"mode": "all",
"value": 0.0
},
"matched_module_names": [
"failure_signal_bridge.rollback_bridge",
"request_normalizer",
"request_normalizer.adapter",
"rollback_adapter",
"rollback_adapter.head",
"runtime_harness_adapter",
"runtime_harness_adapter.bridge",
"tool_protocol_adapter",
"tool_protocol_adapter.bridge",
"toolspec_bridge",
"toolspec_bridge.adapter",
"toolspec_head",
"toolspec_head.param_schema_adapter",
"toolspec_head.postcondition_adapter",
"toolspec_head.precondition_adapter",
"toolspec_head.receipt_formatter",
"toolspec_head.trigger_gate",
"toolspec_head.validator_gate",
"validator_feedback_loop.rollback_adapter"
],
"name": "group_2_custom_runtime_harness_bridge",
"rank": 8,
"rationale": "Runtime / harness obedience should attach to explicit interface bridges before any backbone fallback.",
"risk": "low",
"surface": "custom_runtime_harness_bridge"
},
{
"alpha": 16,
"dropout": 0.0,
"exclude_patterns": [
"(^|\\.)lm_head($|\\.)",
"(^|\\.)embed_tokens($|\\.)",
"(^|\\.)norm($|\\.)",
"attnres|attention_residual",
"dualpath",
"mhc|hyper[-_]?connection",
"rope|rotary",
"kvcache|kv_cache",
"(^|\\.)memory(_store|_bank)?($|\\.)"
],
"include_patterns": [
"(^|\\.)(validator_feedback|sandbox_result_validator|sandbox_rollback_bridge|failure_signal|rollback)(_adapter|_bridge|_head)?($|\\.)"
],
"intents": [
"validator_alignment"
],
"layer_window": {
"mode": "all",
"value": 0.0
},
"matched_module_names": [
"failure_signal_bridge",
"failure_signal_bridge.rollback_bridge",
"rollback_adapter",
"rollback_adapter.head",
"sandbox_rollback_bridge",
"sandbox_rollback_bridge.adapter",
"validator_feedback_bridge",
"validator_feedback_bridge.adapter",
"validator_feedback_loop.rollback_adapter"
],
"name": "group_3_custom_validator_bridge",
"rank": 8,
"rationale": "Validation and rollback fidelity should prefer explicit validator / rollback bridges.",
"risk": "low",
"surface": "custom_validator_bridge"
},
{
"alpha": 8,
"dropout": 0.0,
"exclude_patterns": [
"(^|\\.)lm_head($|\\.)",
"(^|\\.)embed_tokens($|\\.)",
"(^|\\.)norm($|\\.)",
"attnres|attention_residual",
"dualpath",
"mhc|hyper[-_]?connection",
"rope|rotary",
"kvcache|kv_cache",
"(^|\\.)memory(_store|_bank)?($|\\.)"
],
"include_patterns": [
"(^|\\.)(memory_boundary_guard|episodic_memory|session_compactor)(_adapter|_bridge|_head)?($|\\.)"
],
"intents": [
"session_continuity"
],
"layer_window": {
"mode": "all",
"value": 0.0
},
"matched_module_names": [
"episodic_memory",
"episodic_memory.adapter",
"memory_boundary_guard",
"memory_boundary_guard.adapter",
"memory_boundary_guard.rollback_filter",
"session_compactor",
"session_compactor.adapter"
],
"name": "group_4_custom_memory_boundary_bridge",
"rank": 4,
"rationale": "Session continuity should bind to boundary-aware memory packet surfaces rather than broad backbone tuning.",
"risk": "low",
"surface": "custom_memory_boundary_bridge"
}
],
"upstream_profile_hint": null,
"version": "veriloop.peft_target_selector.v2",
"warnings": []
},
"training_mode": "mounted_head",
"version": "veriloop.uncertainty_adapter_trainer.v5.qwen36",
"warnings": [
"Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.",
"Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op."
]
},
"status": "trained",
"train_metrics": {
"adapter_exported": true,
"auto_lora_from_ia3": false,
"best_epoch": 2,
"best_quality_score": 0.8625935807221907,
"epochs_completed": 4,
"loss": 0.009006613283418119,
"micro_batches": 154,
"micro_batches_total": 616,
"optimizer_steps": 10,
"optimizer_steps_total": 40,
"peft_method": "lora_narrow",
"used_peft": true
},
"version": "veriloop.uncertainty_adapter_trainer.v5.qwen36",
"warnings": [
"Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.",
"Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op.",
"Synthetic holdout split used for quality-oriented train/eval separation."
]
}