| task_name: general |
| model_name: bge |
| model_dir: /iyunwen/nlpdata/PublicPretrainedModel/bge-base-zh/ |
| use_deepspeed: true |
| desc: "piccolo" |
| train_method: "ewc" |
| ewc_ratio: 10.0 |
| cosent_ratio: 20.0 |
| in_batch_ratio: 30.0 |
| save_steps: 50 |
| hard_neg_ratio: 0.2 |
| in_batch_train_paths: |
| |
| synthetic_qp: |
| - /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl |
| - /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_synthetic_alpaca2_hfl_0_100000_vec_neg.jsonl |
| |
| normal: |
| - /iyunwen/nlpdata/work/LP/Data/VecData/v2/m3e_long_length_hard_neg.jsonl |
| - /iyunwen/nlpdata/work/LP/Data/VecData/v2/wudao_long_length_hard_neg.jsonl |
| - /iyunwen/nlpdata/work/LP/Data/VecData/stella/mrc_data.jsonl |
| - /iyunwen/nlpdata/work/LP/Data/VecData/stella/guowang_data.jsonl |
|
|
|
|
| pair_train_paths: |
| binclf: |
| - /iyunwen/nlpdata/work/LP/Data/VecData/v2/binclf_data.jsonl |
| nli: |
| - /iyunwen/nlpdata/work/LP/Data/VecData/v2/nli_data.jsonl |
|
|
| loader_idxs: null |
| in_batch_bsz: 128 |
| pair_bsz: 128 |
| max_length: 512 |
|
|
| auto_ouput_dir: false |
| train_args: |
| seed: 666 |
| output_dir: /iyunwen/nlpdata/work/LP/model_path/vec_embedding/stella/s4/ |
| evaluation_strategy: "no" |
| num_train_epochs: 4 |
| logging_steps: 9999999 |
| eval_steps: 9999999 |
| per_device_train_batch_size: 128 |
| gradient_accumulation_steps: 1 |
| per_device_eval_batch_size: 32 |
| learning_rate: 5.0e-06 |
| weight_decay: 0.00001 |
| warmup_ratio: 0.05 |
| lr_scheduler_type: "linear" |
| dataloader_drop_last: false |
|
|
| fp16: true |
| gradient_checkpointing: true |
| deepspeed: |
| fp16: |
| enabled: true |
| hysteresis: 2 |
| initial_scale_power: 16 |
| loss_scale: 0 |
| loss_scale_window: 1000 |
| min_loss_scale: 1 |
| train_micro_batch_size_per_gpu: 128 |
| train_batch_size: "auto" |
| gradient_accumulation_steps: 1 |
| gradient_clipping: auto |
| optimizer: |
| params: |
| adam_w_mode: true |
| lr: 1e-6 |
| torch_adam: true |
| weight_decay: auto |
| type: AdamW |
| scheduler: |
| params: |
| total_num_steps: auto |
| warmup_max_lr: auto |
| warmup_min_lr: auto |
| warmup_num_steps: auto |
| type: WarmupDecayLR |
| steps_per_print: 4 |
| wall_clock_breakdown: false |
| zero_optimization: |
| allgather_bucket_size: 200000000.0 |
| allgather_partitions: true |
| contiguous_gradients: true |
| overlap_comm: true |
| reduce_bucket_size: auto |
| reduce_scatter: true |
| stage: 0 |
|
|
|
|