aksara-pretrain-20b / metadata.json
Ezekiel999's picture
Upload metadata.json with huggingface_hub
bf0798b verified
raw
history blame contribute delete
559 Bytes
{
"total_tokens": 9242077179,
"total_docs": 15605775,
"sources": {
"culturax_id": {
"tokens": 5000002093,
"docs": 9022396
},
"mc4_id": {
"tokens": 0,
"docs": 0
},
"wikipedia_id": {
"tokens": 242073052,
"docs": 659941
},
"oscar_id": {
"tokens": 0,
"docs": 0
},
"cc100_id": {
"tokens": 0,
"docs": 0
},
"english_mix": {
"tokens": 4000002034,
"docs": 5923438
}
},
"id_en_ratio": "57:43",
"timestamp": "2026-04-15T20:24:39.830494"
}