| huggingface-cli download skymizer/fineweb-edu-dedup-45B --local-dir ./data_raw | |
| python books.py \ | |
| --data-path ./data_raw \ | |
| --save-path ./data_proc \ | |
| --content-key text \ | |
| --processes-num 64 \ | |
| --write-batch-size 100 \ | |
| huggingface-cli download skymizer/fineweb-edu-dedup-45B --local-dir ./data_raw | |
| python books.py \ | |
| --data-path ./data_raw \ | |
| --save-path ./data_proc \ | |
| --content-key text \ | |
| --processes-num 64 \ | |
| --write-batch-size 100 \ | |