| { |
| "model_repo_id": "kyLELEng/adaptive-retro-gpt-1b", |
| "corpus_repo_id": "kyLELEng/adaptive-retro-gpt-1b-corpus", |
| "datastore_repo_id": "kyLELEng/adaptive-retro-gpt-1b-datastore", |
| "private_repo": true, |
| "push_to_hub": true, |
| "push_datasets": true, |
| "output_dir": "/tmp/adaptive-retro-gpt-1b", |
| "smoke_test": false, |
| "dataset_id": "HuggingFaceFW/fineweb-edu", |
| "dataset_config": "sample-10BT", |
| "train_split": "train", |
| "validation_split": "train", |
| "text_column": "text", |
| "streaming": true, |
| "min_text_chars": 200, |
| "datastore_dataset_id": "wikimedia/wikipedia", |
| "datastore_dataset_config": "20231101.en", |
| "datastore_split": "train", |
| "datastore_text_column": "text", |
| "corpus_train_docs": 80000, |
| "corpus_validation_docs": 4000, |
| "tokenizer_train_docs": 200000, |
| "vocab_size": 50000, |
| "max_train_docs": 120000, |
| "max_eval_docs": 2048, |
| "max_index_docs": 120000, |
| "max_index_chunks": 120000, |
| "chunk_tokens": 96, |
| "min_chunk_tokens": 24, |
| "hash_dim": 1024, |
| "top_k": 2, |
| "retrieval_seq_len": 512, |
| "seq_len": 2048, |
| "d_model": 2048, |
| "n_layers": 20, |
| "n_heads": 16, |
| "dropout": 0.0, |
| "retrieval_layers": "5,11,17", |
| "retrieval_budget_lambda": 0.001, |
| "no_retrieval_prob": 0.1, |
| "random_retrieval_prob": 0.1, |
| "max_steps": 20000, |
| "per_device_batch_size": 2, |
| "gradient_accumulation_steps": 2, |
| "learning_rate": 0.00018, |
| "min_lr_ratio": 0.1, |
| "warmup_steps": 1000, |
| "weight_decay": 0.1, |
| "grad_clip": 1.0, |
| "precision": "bf16", |
| "gradient_checkpointing": false, |
| "log_every": 10, |
| "eval_every": 2000, |
| "save_every": 5000, |
| "max_eval_batches": 32, |
| "seed": 17 |
| } |