File size: 284 Bytes
5a8b07f
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
{
  "source_dataset": "HuggingFaceFW/fineweb-edu",
  "source_config": "sample-10BT",
  "train_rows": 80000,
  "validation_rows": 4000,
  "text_column": "text",
  "cleaning": "remove null bytes, strip, collapse whitespace, filter by minimum character length",
  "min_text_chars": 200
}