File size: 5,274 Bytes
b5cc210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
{
  "status": "ok",
  "enabled": true,
  "activated_at_utc": "2026-04-26T02:24:15.464507+00:00",
  "run_id": "qwen-qwen2-5-0-5b-instruct",
  "source": "top-level",
  "label": "local-qwen-0.5b-active-smoke",
  "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
  "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
  "preferred_artifact": "grpo_adapter",
  "mode": "symlink",
  "source_checkpoint_dir": "checkpoints",
  "source_report_dir": "outputs/reports",
  "grpo_adapter": "checkpoints/active/grpo_adapter",
  "merged_model": "checkpoints/active/merged",
  "sft_adapter": "checkpoints/active/sft_adapter",
  "availability": {
    "grpo_adapter": true,
    "merged": true,
    "sft_adapter": true
  },
  "reports": {
    "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json",
    "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json",
    "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json",
    "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json",
    "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json",
    "plot_index.json": "outputs/reports/active_model/plot_index.json",
    "dose_train.json": "outputs/reports/active_model/dose_train.json",
    "baselines.json": "outputs/reports/active_model/baselines.json",
    "robustness.json": "outputs/reports/active_model/robustness.json",
    "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json",
    "sft_run.json": "outputs/reports/active_model/sft_run.json",
    "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt",
    "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json",
    "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json",
    "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json",
    "improvement_report.json": "outputs/reports/active_model/improvement_report.json",
    "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json",
    "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json",
    "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json",
    "risk_train.json": "outputs/reports/active_model/risk_train.json",
    "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json",
    "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json",
    "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json",
    "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json",
    "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json",
    "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json",
    "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json",
    "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json",
    "graph_train.json": "outputs/reports/active_model/graph_train.json",
    "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
    "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
    "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
    "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
    "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
    "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json",
    "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
    "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
    "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
    "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
    "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
    "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
    "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json",
    "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json"
  },
  "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available."
}