| { |
| "model": "Qwen/Qwen3.5-2B-Base", |
| "n_trials": 3, |
| "epochs": 15, |
| "regularization_ratio": 0.33, |
| "aggregate": { |
| "recall": { |
| "pooled_correct": 61, |
| "pooled_total": 105, |
| "pooled_rate": 0.580952380952381, |
| "per_trial_rates": [ |
| 0.6571428571428571, |
| 0.5428571428571428, |
| 0.5428571428571428 |
| ], |
| "mean": 0.5809523809523809, |
| "stdev": 0.06598288790738582, |
| "ci_95_lower": 0.4853552056582404, |
| "ci_95_upper": 0.670835074528747 |
| }, |
| "general_knowledge": { |
| "pooled_correct": 60, |
| "pooled_total": 60, |
| "pooled_rate": 1.0, |
| "per_trial_rates": [ |
| 1.0, |
| 1.0, |
| 1.0 |
| ], |
| "mean": 1.0, |
| "stdev": 0.0, |
| "ci_95_lower": 0.9398260695220669, |
| "ci_95_upper": 0.9999999999999999 |
| }, |
| "training": { |
| "mean_time_s": 69.6302502155304, |
| "stdev_time_s": 1.185997256195759, |
| "mean_steps": 180, |
| "per_trial_times": [ |
| 68.26203393936157, |
| 70.36512899398804, |
| 70.26358771324158 |
| ] |
| } |
| }, |
| "trials": [ |
| { |
| "trial_id": 1, |
| "n_confirmed_unknown": 35, |
| "n_training_pairs": 52, |
| "training_steps": 180, |
| "training_time_s": 68.26203393936157, |
| "initial_loss": 1.290154, |
| "final_loss": 0.451566, |
| "recall_correct": 23, |
| "recall_total": 35, |
| "recall_rate": 0.6571428571428571, |
| "general_correct": 20, |
| "general_total": 20, |
| "general_rate": 1.0, |
| "category_scores": { |
| "Awards": { |
| "correct": 7, |
| "total": 7 |
| }, |
| "Entertainment": { |
| "correct": 1, |
| "total": 4 |
| }, |
| "Weather/Natural Events": { |
| "correct": 4, |
| "total": 5 |
| }, |
| "Sports": { |
| "correct": 5, |
| "total": 6 |
| }, |
| "Deaths/Obituaries": { |
| "correct": 4, |
| "total": 11 |
| }, |
| "Science": { |
| "correct": 1, |
| "total": 1 |
| }, |
| "Technology/Business": { |
| "correct": 1, |
| "total": 1 |
| } |
| } |
| }, |
| { |
| "trial_id": 2, |
| "n_confirmed_unknown": 35, |
| "n_training_pairs": 52, |
| "training_steps": 180, |
| "training_time_s": 70.36512899398804, |
| "initial_loss": 2.056952, |
| "final_loss": 0.260391, |
| "recall_correct": 19, |
| "recall_total": 35, |
| "recall_rate": 0.5428571428571428, |
| "general_correct": 20, |
| "general_total": 20, |
| "general_rate": 1.0, |
| "category_scores": { |
| "Deaths/Obituaries": { |
| "correct": 0, |
| "total": 11 |
| }, |
| "Awards": { |
| "correct": 6, |
| "total": 7 |
| }, |
| "Weather/Natural Events": { |
| "correct": 4, |
| "total": 5 |
| }, |
| "Technology/Business": { |
| "correct": 1, |
| "total": 1 |
| }, |
| "Entertainment": { |
| "correct": 1, |
| "total": 4 |
| }, |
| "Sports": { |
| "correct": 6, |
| "total": 6 |
| }, |
| "Science": { |
| "correct": 1, |
| "total": 1 |
| } |
| } |
| }, |
| { |
| "trial_id": 3, |
| "n_confirmed_unknown": 35, |
| "n_training_pairs": 52, |
| "training_steps": 180, |
| "training_time_s": 70.26358771324158, |
| "initial_loss": 1.984214, |
| "final_loss": 0.381513, |
| "recall_correct": 19, |
| "recall_total": 35, |
| "recall_rate": 0.5428571428571428, |
| "general_correct": 20, |
| "general_total": 20, |
| "general_rate": 1.0, |
| "category_scores": { |
| "Deaths/Obituaries": { |
| "correct": 2, |
| "total": 11 |
| }, |
| "Awards": { |
| "correct": 5, |
| "total": 7 |
| }, |
| "Technology/Business": { |
| "correct": 0, |
| "total": 1 |
| }, |
| "Weather/Natural Events": { |
| "correct": 4, |
| "total": 5 |
| }, |
| "Entertainment": { |
| "correct": 2, |
| "total": 4 |
| }, |
| "Sports": { |
| "correct": 5, |
| "total": 6 |
| }, |
| "Science": { |
| "correct": 1, |
| "total": 1 |
| } |
| } |
| } |
| ] |
| } |