File size: 1,406 Bytes
221f262 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | {
"args": {
"bundle_cache": ".cache/probe_10s_all_memory_skipfusion.json",
"model_name": "prajjwal1/bert-tiny",
"top_k": 10,
"diagnose_top_k": 50,
"split": "interleave",
"train_fraction": 0.5,
"seed": 99,
"epochs": 3,
"batch_size": 16,
"score_batch_size": 64,
"lr": 2e-05,
"weight_decay": 0.01,
"max_len": 192,
"negatives_per_case": 16,
"max_pos_weight": 8.0,
"max_replacements": 1,
"margin_grid": [
0.0
],
"device": "cuda",
"save_model": ".cache/locomo_bert_tiny_reranker_10sall_seed99",
"progress_every": 125,
"print_margin_sweep": false,
"outcome_report": ".cache/transformer_outcomes_10sall_full_memory_seed99_saved.json",
"print_outcomes": 0
},
"metrics": {
"chosen_margin": 0.0,
"train": {
"cases": 766,
"baseline_hit": 465,
"wide_hit": 615,
"rank_or_topk_miss": 150,
"learned_top_hit": 537,
"learned_rank_or_topk_added": 98,
"merge_hit": 504,
"merge_added": 40,
"merge_lost": 1,
"merge_rank_or_topk_added": 40
},
"held": {
"cases": 765,
"baseline_hit": 466,
"wide_hit": 609,
"rank_or_topk_miss": 143,
"learned_top_hit": 507,
"learned_rank_or_topk_added": 90,
"merge_hit": 493,
"merge_added": 28,
"merge_lost": 1,
"merge_rank_or_topk_added": 28
}
}
} |