{ "created_at": "2026-05-03 10:06:19", "inputs": [ "/mnt/scratch/datasets/combined/sft_combined.jsonl" ], "output": "/mnt/scratch/datasets/combined/sft_combined_ready.jsonl", "settings": { "max_chars": 12000, "max_est_tokens": 3072, "limit": 0 }, "seconds": 7.25, "stats": { "seen": 42138, "assistant_too_short": 8451, "kept": 32996, "user_too_short": 502, "repetitive_response": 85, "too_many_chars": 2, "binary_or_control_text": 1, "garbage_response": 100, "too_many_tokens": 1 }, "schemas": { "instruction_output": 42138 }, "categories": { "librispeech": 7785, "wizardlm": 7506, "dolly": 6539, "alpaca": 6325, "textcaps": 4492, "squad_doc": 349 }, "token_buckets": { "0001-0512": 28102, "0513-1024": 4034, "1025-1536": 712, "1537-2048": 112, "2049-3072": 36 }, "files": [ { "path": "/mnt/scratch/datasets/combined/sft_combined.jsonl", "seconds": 7.25, "stats": { "seen": 42138, "assistant_too_short": 8451, "kept": 32996, "user_too_short": 502, "repetitive_response": 85, "too_many_chars": 2, "binary_or_control_text": 1, "garbage_response": 100, "too_many_tokens": 1 } } ], "preview": [ { "line": 57, "schema": "instruction_output", "category": "squad_doc", "est_tokens": 494, "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nChopin seldom performed publicly in Paris. In later years he generally gave a single ann", "assistant_preview": "his own Paris apartment for small groups of friends" }, { "line": 64, "schema": "instruction_output", "category": "squad_doc", "est_tokens": 217, "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nAt the funeral of the tenor Adolphe Nourrit in Paris in 1839, Chopin made a rare appeara", "assistant_preview": "Berlioz's Grande symphonie funèbre et triomphale" }, { "line": 141, "schema": "instruction_output", "category": "squad_doc", "est_tokens": 402, "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nLaird writes that Altan Khan abolished the native Mongol practices of shamanism and bloo", "assistant_preview": "the native Mongol practices of shamanism and blood sacrifice" }, { "line": 158, "schema": "instruction_output", "category": "squad_doc", "est_tokens": 232, "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nSome independent stereo manufacturers including JVC, Pioneer, Kenwood, Alpine, Sony, and", "assistant_preview": "JVC, Pioneer, Kenwood, Alpine, Sony, and Harman Kardon" }, { "line": 165, "schema": "instruction_output", "category": "squad_doc", "est_tokens": 197, "user_preview": "Read the following document excerpt and answer the question.\n\nDocument:\nWith third parties like Namco, Square Enix, Electronic Arts, Sega, and Hudson Soft all m", "assistant_preview": "Namco, Square Enix, Electronic Arts, Sega, and Hudson Soft" } ] }