| |
| save_data: data |
| overwrite: True |
| seed: 1234 |
| report_every: 100 |
| valid_metrics: ["BLEU"] |
| tensorboard: true |
| tensorboard_log_dir: tensorboard |
|
|
| |
| src_vocab: fren/fr.eole.vocab |
| tgt_vocab: fren/en.eole.vocab |
| src_vocab_size: 32000 |
| tgt_vocab_size: 32000 |
| vocab_size_multiple: 8 |
| share_vocab: false |
| n_sample: 0 |
|
|
| data: |
| corpus_1: |
| path_src: fren/train.cleaned.filtered.fr |
| path_tgt: fren/train.cleaned.filtered.en |
| weight: 200 |
| corpus_2: |
| path_src: ../data/newscrawl.backtrans.cleaned.filtered.fr |
| path_tgt: ../data/newscrawl.backtrans.cleaned.filtered.en |
| weight: 35 |
| corpus_3: |
| path_src: ../data/madlad.backtrans.cleaned.filtered.fr |
| path_tgt: ../data/madlad.backtrans.cleaned.filtered.en |
| weight: 68 |
| corpus_4: |
| path_src: ../data/hansard.fr |
| path_tgt: ../data/hansard.en |
| weight: 5 |
| valid: |
| path_src: fren/dev.fr |
| path_tgt: fren/dev.en |
|
|
| transforms: [sentencepiece, filtertoolong] |
| transforms_configs: |
| sentencepiece: |
| src_subword_model: "fren/fr.spm.model" |
| tgt_subword_model: "fren/en.spm.model" |
| filtertoolong: |
| src_seq_length: 256 |
| tgt_seq_length: 256 |
|
|
| training: |
| |
| model_path: quickmt-fr-en-eole-model |
| keep_checkpoint: 4 |
| train_steps: 200000 |
| save_checkpoint_steps: 5000 |
| valid_steps: 5000 |
| |
| |
| world_size: 1 |
| gpu_ranks: [0] |
|
|
| |
| |
| batch_type: "tokens" |
| batch_size: 6000 |
| valid_batch_size: 2048 |
| batch_size_multiple: 8 |
| accum_count: [20] |
| accum_steps: [0] |
|
|
| |
| compute_dtype: "fp16" |
| optim: "adamw" |
| |
| learning_rate: 3.0 |
| warmup_steps: 5000 |
| decay_method: "noam" |
| adam_beta2: 0.998 |
|
|
| |
| bucket_size: 256000 |
| num_workers: 4 |
| prefetch_factor: 128 |
| |
| |
| dropout_steps: [0] |
| dropout: [0.1] |
| attention_dropout: [0.1] |
| max_grad_norm: 0 |
| label_smoothing: 0.1 |
| average_decay: 0.0001 |
| param_init_method: xavier_uniform |
| normalization: "tokens" |
|
|
| model: |
| architecture: "transformer" |
| share_embeddings: false |
| share_decoder_embeddings: false |
| add_estimator: false |
| add_ffnbias: true |
| add_qkvbias: false |
| layer_norm: standard |
| mlp_activation_fn: gelu |
| hidden_size: 768 |
| encoder: |
| layers: 12 |
| decoder: |
| layers: 2 |
| heads: 16 |
| transformer_ff: 4096 |
| embeddings: |
| word_vec_size: 768 |
| position_encoding_type: "SinusoidalInterleaved" |
|
|
|
|
|
|