| { |
| "created_at": "2026-05-03 10:06:19", |
| "inputs": [ |
| "/mnt/scratch/datasets/combined/sft_combined.jsonl" |
| ], |
| "output": "/mnt/scratch/datasets/combined/sft_combined_ready.jsonl", |
| "settings": { |
| "max_chars": 12000, |
| "max_est_tokens": 3072, |
| "limit": 0 |
| }, |
| "seconds": 7.25, |
| "stats": { |
| "seen": 42138, |
| "assistant_too_short": 8451, |
| "kept": 32996, |
| "user_too_short": 502, |
| "repetitive_response": 85, |
| "too_many_chars": 2, |
| "binary_or_control_text": 1, |
| "garbage_response": 100, |
| "too_many_tokens": 1 |
| }, |
| "schemas": { |
| "instruction_output": 42138 |
| }, |
| "categories": { |
| "librispeech": 7785, |
| "wizardlm": 7506, |
| "dolly": 6539, |
| "alpaca": 6325, |
| "textcaps": 4492, |
| "squad_doc": 349 |
| }, |
| "token_buckets": { |
| "0001-0512": 28102, |
| "0513-1024": 4034, |
| "1025-1536": 712, |
| "1537-2048": 112, |
| "2049-3072": 36 |
| }, |
| "files": [ |
| { |
| "path": "/mnt/scratch/datasets/combined/sft_combined.jsonl", |
| "seconds": 7.25, |
| "stats": { |
| "seen": 42138, |
| "assistant_too_short": 8451, |
| "kept": 32996, |
| "user_too_short": 502, |
| "repetitive_response": 85, |
| "too_many_chars": 2, |
| "binary_or_control_text": 1, |
| "garbage_response": 100, |
| "too_many_tokens": 1 |
| } |
| } |
| ], |
| "preview": [ |
| { |
| "line": 57, |
| "schema": "instruction_output", |
| "category": "squad_doc", |
| "est_tokens": 494, |
| "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nChopin seldom performed publicly in Paris. In later years he generally gave a single ann", |
| "assistant_preview": "his own Paris apartment for small groups of friends" |
| }, |
| { |
| "line": 64, |
| "schema": "instruction_output", |
| "category": "squad_doc", |
| "est_tokens": 217, |
| "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nAt the funeral of the tenor Adolphe Nourrit in Paris in 1839, Chopin made a rare appeara", |
| "assistant_preview": "Berlioz's Grande symphonie funèbre et triomphale" |
| }, |
| { |
| "line": 141, |
| "schema": "instruction_output", |
| "category": "squad_doc", |
| "est_tokens": 402, |
| "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nLaird writes that Altan Khan abolished the native Mongol practices of shamanism and bloo", |
| "assistant_preview": "the native Mongol practices of shamanism and blood sacrifice" |
| }, |
| { |
| "line": 158, |
| "schema": "instruction_output", |
| "category": "squad_doc", |
| "est_tokens": 232, |
| "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nSome independent stereo manufacturers including JVC, Pioneer, Kenwood, Alpine, Sony, and", |
| "assistant_preview": "JVC, Pioneer, Kenwood, Alpine, Sony, and Harman Kardon" |
| }, |
| { |
| "line": 165, |
| "schema": "instruction_output", |
| "category": "squad_doc", |
| "est_tokens": 197, |
| "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nWith third parties like Namco, Square Enix, Electronic Arts, Sega, and Hudson Soft all m", |
| "assistant_preview": "Namco, Square Enix, Electronic Arts, Sega, and Hudson Soft" |
| } |
| ] |
| } |