{ "artifacts": { "adapter_dir": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/adapter", "best_checkpoint_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/best_checkpoint_manifest.json", "epoch_history": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/epoch_history.json", "eval_jsonl": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_eval.jsonl", "host_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/host_manifest.json", "plan_json": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_adapter_plan.json", "tokenizer_dir": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/tokenizer", "train_jsonl": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_train.jsonl", "training_manifest": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_training_manifest.json", "uncertainty_head": "/private/wang_libo/veriloop_coder_e1/outputs/uncertainty_qwen36_rootfix_run1/uncertainty_head.pt" }, "dataset": { "eval_modes": { "conflicting_evidence": 5, "evidence_gap": 5, "exec_required": 5, "high_risk": 5, "low_uncertainty": 5, "patch_pending": 5, "reverse_engineering_ambiguity": 5, "self_check_failure": 5, "spec_mismatch": 5, "validator_negation": 5, "worktree_conflict": 5 }, "eval_size": 55, "train_modes": { "conflicting_evidence": 14, "evidence_gap": 14, "exec_required": 14, "high_risk": 14, "low_uncertainty": 14, "patch_pending": 14, "reverse_engineering_ambiguity": 14, "self_check_failure": 14, "spec_mismatch": 14, "validator_negation": 14, "worktree_conflict": 14 }, "train_size": 154 }, "eval_metrics": { "adapter_exported": true, "auto_lora_from_ia3": false, "best_epoch": 2, "best_quality_score": 0.8625935807221907, "count": 55, "mae": { "u_answer": 0.15174226462841034, "u_evidence": 0.19610758125782013, "u_exec": 0.18561214208602905, "u_risk": 0.1553734689950943, "u_spec": 0.21633382141590118 }, "mean_mae": 0.18103384971618652, "mean_rmse": 0.24169571697711945, "moderate_accuracy": 0.6727272727272727, "peft_method": "lora_narrow", "quality_score": 0.8625935807221907, "rmse": { "u_answer": 0.18724055588245392, "u_evidence": 0.22527915239334106, "u_exec": 0.25238174200057983, "u_risk": 0.20667441189289093, "u_spec": 0.3369026482105255 }, "tight_accuracy": 0.4, "used_peft": true, "weighted_mae": 0.18083095811830807, "weighted_rmse": 0.24125460771003793 }, "plan": { "adapter_alpha": 16, "adapter_dropout": 0.05, "adapter_rank": 8, "backbone_manifest": { "host_inventory_size": 79 }, "config": { "allow_backbone_bridge": false, "backbone": "/public/wang_libo/veriloop_coder_e1/model", "bf16": true, "cache_dir": null, "cpu_max_memory_gib": 96, "dataset_jsonl": null, "dim_weights": { "u_answer": 1.1, "u_evidence": 1.35, "u_exec": 1.35, "u_risk": 1.55, "u_spec": 1.25 }, "early_stopping_min_delta": 0.001, "early_stopping_patience": 2, "enable_synthetic_dataset": true, "eval_jsonl": null, "eval_samples_per_mode": 5, "fp16": false, "gpu_max_memory_gib": 44, "gradient_accumulation_steps": 16, "host_dropout": 0.0, "learning_rate": 2e-05, "local_files_only": true, "logging_steps": 10, "max_grad_norm": 1.0, "max_length": 1664, "min_epochs_before_early_stop": 2, "num_train_epochs": 4.0, "output_dir": "./outputs/uncertainty_qwen36_rootfix_run1", "per_device_eval_batch_size": 1, "per_device_train_batch_size": 1, "prefer_best_checkpoint_export": true, "probe_dropout": 0.03, "probe_hidden_factor": 0.75, "product_line": "veriloop_coder", "quantization_mode": "4bit", "quantization_required": false, "require_trainable_targets": true, "revision": null, "run_post_train_eval": true, "save_best_checkpoint": true, "seed": 11, "selection_mode": "minimal", "train_samples_per_mode": 14, "training_mode": "mounted_head", "trust_remote_code": true, "use_double_quant": true, "warmup_ratio": 0.05, "weight_decay": 0.0, "weighted_mae_penalty": 0.5, "weighted_rmse_penalty": 0.5 }, "dataset_summary": { "eval_modes": { "conflicting_evidence": 5, "evidence_gap": 5, "exec_required": 5, "high_risk": 5, "low_uncertainty": 5, "patch_pending": 5, "reverse_engineering_ambiguity": 5, "self_check_failure": 5, "spec_mismatch": 5, "validator_negation": 5, "worktree_conflict": 5 }, "eval_size": 55, "train_modes": { "conflicting_evidence": 14, "evidence_gap": 14, "exec_required": 14, "high_risk": 14, "low_uncertainty": 14, "patch_pending": 14, "reverse_engineering_ambiguity": 14, "self_check_failure": 14, "spec_mismatch": 14, "validator_negation": 14, "worktree_conflict": 14 }, "train_size": 154 }, "head_strategy": "host_head", "notes": [ "Primary route is host-surface-first uncertainty training.", "Validator receipts, self-check failure, reverse-engineering ambiguity, worktree conflicts, and patch continuity are first-class signals.", "DualPath, Full AttnRes, mHC, routers, experts, and broad attention-layer PEFT stay excluded.", "This adapter should improve runtime uncertainty routing, not general coding free-formity." ], "peft_method": "lora_narrow", "recipe": { "adapter_family": "uncertainty", "backbone": "/public/wang_libo/veriloop_coder_e1/model", "backbone_family": "qwen_dense", "excluded_patterns": [ "(?i)\\bdualpath\\b", "(?i)\\bmhc\\b", "(?i)\\bfull[_\\- ]?attnres\\b", "(?i)\\battnres(_full)?\\b", "(?i)\\brouter\\b", "(?i)\\bexperts?\\b", "(?i)\\bmoe\\b.*\\b(gate|router|expert)\\b", "(?i)\\brope\\b.*\\b(freq|inv_freq|theta|rotary)\\b", "(?i)\\bkvcache\\b", "(?i)\\bposition_embedding\\b", "(?i)\\bembed(tokens|ding)?\\b", "(?i)\\blm_head\\b" ], "harness_constraints": [ "Harness Engineering remains the primary convergence layer.", "Adapter must not bypass runtime orchestrator / validator / rollback loops.", "Adapter outputs remain subordinate to VeriLoop control-plane decisions.", "Adapter must not create hidden prompt-style memory authority.", "Adapter must support bounded uncertainty calibration rather than generic hesitation.", "Validator and receipt evidence must remain able to update uncertainty." ], "hyperparams": { "alpha": 16, "bias": "none", "dropout": 0.05, "fan_in_fan_out": false, "modules_to_save": [ "input_layernorm" ], "r": 8, "task_type": "CAUSAL_LM" }, "merge_policy": "merge_after_guard", "metadata": { "allow_backbone_bridge": false, "allow_vla_action_expert": false, "harness_first": true, "prefer_explicit_heads": true, "prefer_qlora_for_backbone_bridge": true, "require_harness_first": true, "selector_group_count": 2, "strict_narrow_scope": true, "trainer": "veriloop.uncertainty_adapter_trainer.v5.qwen36", "uncertainty_training": true }, "notes": [ "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op.", "Backbone family inferred as qwen_dense.", "PEFT method resolved as lora_narrow.", "Recipe is harness-first: runtime convergence remains in VeriLoop control-plane + harness, not in broad weight surgery.", "Block AttnRes, DualPath, mHC hooks, RoPE, KV-cache, and broad MoE routing remain structurally excluded." ], "peft_method": "lora_narrow", "precision_policy": "auto", "product_line": "veriloop_coder", "regression_requirements": [ "Must pass PEFT regression guard structural policy checks.", "Must not introduce forbidden backbone/serving structural targets.", "Must preserve harness regression envelope for the selected product line.", "Budgeted uncertainty convergence must not regress.", "Uncertainty calibration must not collapse into generic caution." ], "target_groups": [ { "alpha": 16, "dropout": 0.0, "name": "group_1_custom_control_head", "rank": 8, "rationale": "Prefer explicit uncertainty / calibration heads over backbone surgery.", "surface": "custom_control_head", "target_modules": [ "uncertainty_head", "uncertainty_head.calibration_mlp", "uncertainty_head.proj" ] }, { "alpha": 16, "dropout": 0.0, "name": "group_2_custom_validator_bridge", "rank": 8, "rationale": "Validation and rollback fidelity should prefer explicit validator / rollback bridges.", "surface": "custom_validator_bridge", "target_modules": [ "failure_signal_bridge", "failure_signal_bridge.rollback_bridge", "rollback_adapter", "rollback_adapter.head", "sandbox_rollback_bridge", "sandbox_rollback_bridge.adapter", "validator_feedback_bridge", "validator_feedback_bridge.adapter", "validator_feedback_loop.rollback_adapter" ] } ], "target_modules": [ "uncertainty_head", "uncertainty_head.calibration_mlp", "uncertainty_head.proj", "failure_signal_bridge", "failure_signal_bridge.rollback_bridge", "rollback_adapter", "rollback_adapter.head", "sandbox_rollback_bridge", "sandbox_rollback_bridge.adapter", "validator_feedback_bridge", "validator_feedback_bridge.adapter", "validator_feedback_loop.rollback_adapter" ], "version": "veriloop.lora_recipe_veriloop.v2" }, "selected_target_modules": [ "uncertainty_head", "uncertainty_head.calibration_mlp", "uncertainty_head.proj" ], "target_selection": { "backbone_archetype": "qwen_dense", "exclusions": [ { "pattern": "(^|\\.)lm_head($|\\.)", "reason": "Do not retune final token head; too broad and evaluation-heavy." }, { "pattern": "(^|\\.)embed_tokens($|\\.)", "reason": "Embedding surgery risks broad semantic drift." }, { "pattern": "(^|\\.)norm($|\\.)", "reason": "Global norm tuning can destabilize calibration across scenes." }, { "pattern": "attnres|attention_residual", "reason": "Block AttnRes may be mounted structurally but is never a PEFT target." }, { "pattern": "dualpath", "reason": "DualPath is serving/runtime infrastructure only." }, { "pattern": "mhc|hyper[-_]?connection", "reason": "mHC-inspired stability hooks remain structural, not PEFT surfaces." }, { "pattern": "rope|rotary", "reason": "RoPE/context surgery is handled architecturally, not by narrow PEFT here." }, { "pattern": "kvcache|kv_cache", "reason": "KV-cache runtime surfaces are not PEFT targets." }, { "pattern": "(^|\\.)memory(_store|_bank)?($|\\.)", "reason": "Persistent memory stores are harness/runtime policy surfaces, not PEFT targets." } ], "inventory_size": 79, "inventory_source": "provided_names", "notes": [ "Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.", "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op." ], "recommended_training": "ia3_head_only", "request": { "allow_backbone_bridge": false, "allow_vla_action_expert": false, "backbone": "/public/wang_libo/veriloop_coder_e1/model", "explicit_exclude_patterns": [], "explicit_include_patterns": [], "intents": [ "uncertainty", "validator_alignment", "harness_alignment", "runtime_protocol", "session_continuity", "worktree_discipline" ], "prefer_qlora_for_backbone_bridge": true, "product_line": "veriloop_coder", "selection_mode": "minimal" }, "selected_groups": [ { "alpha": 16, "dropout": 0.0, "exclude_patterns": [ "(^|\\.)lm_head($|\\.)", "(^|\\.)embed_tokens($|\\.)", "(^|\\.)norm($|\\.)", "attnres|attention_residual", "dualpath", "mhc|hyper[-_]?connection", "rope|rotary", "kvcache|kv_cache", "(^|\\.)memory(_store|_bank)?($|\\.)" ], "include_patterns": [ "(^|\\.)(uncertainty_head|uncertainty_adapter|calib(_head|ration_adapter)?)($|\\.)" ], "intents": [ "uncertainty" ], "layer_window": { "mode": "all", "value": 0.0 }, "matched_module_names": [ "uncertainty_head", "uncertainty_head.calibration_mlp", "uncertainty_head.proj" ], "name": "group_1_custom_control_head", "rank": 8, "rationale": "Prefer explicit uncertainty / calibration heads over backbone surgery.", "risk": "low", "surface": "custom_control_head" }, { "alpha": 16, "dropout": 0.05, "exclude_patterns": [ "(^|\\.)lm_head($|\\.)", "(^|\\.)embed_tokens($|\\.)", "(^|\\.)norm($|\\.)", "attnres|attention_residual", "dualpath", "mhc|hyper[-_]?connection", "rope|rotary", "kvcache|kv_cache", "(^|\\.)memory(_store|_bank)?($|\\.)" ], "include_patterns": [ "(^|\\.)(runtime_harness|query_runtime|task_brief|task_runtime|tool_protocol|permission_context|worktree|session_state|request_normalizer|action_allowlist|constraint_guard|progress_state|workspace_snapshot|repo_contract|knowledge_entry|completion_criteria|search_bridge|sandbox_search_bridge)(_adapter|_bridge|_head)?($|\\.)", "(^|\\.)(toolspec|tool(_call)?(_grammar|_interface)?|harness|validator|rollback|receipt|patch|permission|session|worktree)(_adapter|_bridge|_head)?($|\\.)" ], "intents": [ "harness_alignment", "runtime_protocol", "session_continuity", "worktree_discipline" ], "layer_window": { "mode": "all", "value": 0.0 }, "matched_module_names": [ "failure_signal_bridge.rollback_bridge", "request_normalizer", "request_normalizer.adapter", "rollback_adapter", "rollback_adapter.head", "runtime_harness_adapter", "runtime_harness_adapter.bridge", "tool_protocol_adapter", "tool_protocol_adapter.bridge", "toolspec_bridge", "toolspec_bridge.adapter", "toolspec_head", "toolspec_head.param_schema_adapter", "toolspec_head.postcondition_adapter", "toolspec_head.precondition_adapter", "toolspec_head.receipt_formatter", "toolspec_head.trigger_gate", "toolspec_head.validator_gate", "validator_feedback_loop.rollback_adapter" ], "name": "group_2_custom_runtime_harness_bridge", "rank": 8, "rationale": "Runtime / harness obedience should attach to explicit interface bridges before any backbone fallback.", "risk": "low", "surface": "custom_runtime_harness_bridge" }, { "alpha": 16, "dropout": 0.0, "exclude_patterns": [ "(^|\\.)lm_head($|\\.)", "(^|\\.)embed_tokens($|\\.)", "(^|\\.)norm($|\\.)", "attnres|attention_residual", "dualpath", "mhc|hyper[-_]?connection", "rope|rotary", "kvcache|kv_cache", "(^|\\.)memory(_store|_bank)?($|\\.)" ], "include_patterns": [ "(^|\\.)(validator_feedback|sandbox_result_validator|sandbox_rollback_bridge|failure_signal|rollback)(_adapter|_bridge|_head)?($|\\.)" ], "intents": [ "validator_alignment" ], "layer_window": { "mode": "all", "value": 0.0 }, "matched_module_names": [ "failure_signal_bridge", "failure_signal_bridge.rollback_bridge", "rollback_adapter", "rollback_adapter.head", "sandbox_rollback_bridge", "sandbox_rollback_bridge.adapter", "validator_feedback_bridge", "validator_feedback_bridge.adapter", "validator_feedback_loop.rollback_adapter" ], "name": "group_3_custom_validator_bridge", "rank": 8, "rationale": "Validation and rollback fidelity should prefer explicit validator / rollback bridges.", "risk": "low", "surface": "custom_validator_bridge" }, { "alpha": 8, "dropout": 0.0, "exclude_patterns": [ "(^|\\.)lm_head($|\\.)", "(^|\\.)embed_tokens($|\\.)", "(^|\\.)norm($|\\.)", "attnres|attention_residual", "dualpath", "mhc|hyper[-_]?connection", "rope|rotary", "kvcache|kv_cache", "(^|\\.)memory(_store|_bank)?($|\\.)" ], "include_patterns": [ "(^|\\.)(memory_boundary_guard|episodic_memory|session_compactor)(_adapter|_bridge|_head)?($|\\.)" ], "intents": [ "session_continuity" ], "layer_window": { "mode": "all", "value": 0.0 }, "matched_module_names": [ "episodic_memory", "episodic_memory.adapter", "memory_boundary_guard", "memory_boundary_guard.adapter", "memory_boundary_guard.rollback_filter", "session_compactor", "session_compactor.adapter" ], "name": "group_4_custom_memory_boundary_bridge", "rank": 4, "rationale": "Session continuity should bind to boundary-aware memory packet surfaces rather than broad backbone tuning.", "risk": "low", "surface": "custom_memory_boundary_bridge" } ], "upstream_profile_hint": null, "version": "veriloop.peft_target_selector.v2", "warnings": [] }, "training_mode": "mounted_head", "version": "veriloop.uncertainty_adapter_trainer.v5.qwen36", "warnings": [ "Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.", "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op." ] }, "status": "trained", "train_metrics": { "adapter_exported": true, "auto_lora_from_ia3": false, "best_epoch": 2, "best_quality_score": 0.8625935807221907, "epochs_completed": 4, "loss": 0.009006613283418119, "micro_batches": 154, "micro_batches_total": 616, "optimizer_steps": 10, "optimizer_steps_total": 40, "peft_method": "lora_narrow", "used_peft": true }, "version": "veriloop.uncertainty_adapter_trainer.v5.qwen36", "warnings": [ "Harness Engineering is primary; PEFT is limited to obedience-facing, interface-facing support surfaces.", "Backbone bridge tuning disabled explicitly; selector stays on custom surfaces or no-op.", "Synthetic holdout split used for quality-oriented train/eval separation." ] }