File size: 284 Bytes
5a8b07f | 1 2 3 4 5 6 7 8 9 | {
"source_dataset": "HuggingFaceFW/fineweb-edu",
"source_config": "sample-10BT",
"train_rows": 80000,
"validation_rows": 4000,
"text_column": "text",
"cleaning": "remove null bytes, strip, collapse whitespace, filter by minimum character length",
"min_text_chars": 200
} |