| program: train.py |
| project: dalle-mini |
| method: random |
| metric: |
| name: eval/loss |
| goal: minimize |
| parameters: |
| optim: |
| value: distributed_shampoo |
| learning_rate: |
| distribution: log_uniform |
| |
| min: -9.2 |
| max: -6.9 |
| tokenizer_name: |
| value: boris/dalle-mini-tokenizer |
| config_name: |
| value: ./config/mini |
| dtype: |
| value: bfloat16 |
| dataset_repo_or_path: |
| value: ./data |
| per_device_train_batch_size: |
| value: 64 |
| per_device_eval_batch_size: |
| value: 64 |
| gradient_accumulation_steps: |
| value: 1 |
| warmup_steps: |
| value: 1000 |
| num_train_epochs: |
| value: 1 |
| max_train_samples: |
| value: 1000000 |
| logging_steps: |
| value: 40 |
| eval_steps: |
| value: 200 |
|
|
| command: |
| - python3 |
| - ${program} |
| - "--streaming" |
| - "--output_dir" |
| - "./output" |
| - "--overwrite_output_dir" |
| - "--do_train" |
| - "--do_eval" |
| - ${args} |
|
|