| model_args: |
| attn_implementation: flash_attention_2 |
| bnb_4bit_quant_type: nf4 |
| load_in_4bit: false |
| load_in_8bit: false |
| lora_alpha: 32 |
| lora_dropout: 0.05 |
| lora_modules_to_save: null |
| lora_r: 16 |
| lora_target_modules: null |
| lora_task_type: CAUSAL_LM |
| model_name_or_path: Qwen/Qwen2.5-Coder-7B-Instruct |
| model_revision: main |
| torch_dtype: bfloat16 |
| trust_remote_code: false |
| use_bnb_nested_quant: false |
| use_dora: false |
| use_peft: false |
| use_rslora: false |
| script_args: |
| cosine_max_len: 1000 |
| cosine_max_value_correct: 1.0 |
| cosine_max_value_wrong: -0.5 |
| cosine_min_value_correct: 0.5 |
| cosine_min_value_wrong: 0.0 |
| dataset_config: null |
| dataset_name: simone-papicchio/bird |
| dataset_test_split: test |
| dataset_train_split: train |
| gradient_checkpointing_use_reentrant: false |
| ignore_bias_buffers: false |
| reward_funcs: |
| - qatch_metrics |
| - format |
| - tag_count |
| training_args: |
| _n_gpu: 1 |
| accelerator_config: |
| dispatch_batches: null |
| even_batches: true |
| gradient_accumulation_kwargs: null |
| non_blocking: false |
| split_batches: false |
| use_configured_state: false |
| use_seedable_sampler: true |
| adafactor: false |
| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_epsilon: 1.0e-08 |
| add_system_prompt: true |
| add_validation: false |
| auto_find_batch_size: false |
| average_tokens_across_devices: false |
| base_db_path: data/bird_train/train_databases |
| batch_eval_metrics: false |
| benchmarks: [] |
| beta: 0.04 |
| bf16: true |
| bf16_full_eval: false |
| cache_implementation: null |
| cached_file_path: /workspaces/deep_thinking/cache_target_sql2execution_BIRD_train.pkl |
| callbacks: {} |
| chat_template: null |
| data_seed: null |
| dataloader_drop_last: false |
| dataloader_num_workers: 0 |
| dataloader_persistent_workers: false |
| dataloader_pin_memory: true |
| dataloader_prefetch_factor: null |
| dataset_test_split_name: validation |
| ddp_backend: null |
| ddp_broadcast_buffers: null |
| ddp_bucket_cap_mb: null |
| ddp_find_unused_parameters: null |
| ddp_timeout: 1800 |
| debug: [] |
| deepspeed: null |
| disable_tqdm: false |
| do_eval: false |
| do_predict: false |
| do_train: false |
| ds3_gather_for_generation: true |
| epsilon: 0.2 |
| epsilon_high: null |
| eval_accumulation_steps: null |
| eval_delay: 0 |
| eval_do_concat_batches: true |
| eval_on_start: false |
| eval_steps: null |
| eval_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy |
| - 'no' |
| eval_use_gather_object: false |
| fp16: false |
| fp16_backend: auto |
| fp16_full_eval: false |
| fp16_opt_level: O1 |
| fsdp: [] |
| fsdp_config: |
| min_num_params: 0 |
| xla: false |
| xla_fsdp_grad_ckpt: false |
| xla_fsdp_v2: false |
| fsdp_min_num_params: 0 |
| fsdp_transformer_layer_cls_to_wrap: null |
| full_determinism: false |
| gradient_accumulation_steps: 16 |
| gradient_checkpointing: true |
| gradient_checkpointing_kwargs: |
| use_reentrant: false |
| greater_is_better: false |
| group_by_length: false |
| half_precision_backend: auto |
| hub_always_push: false |
| hub_model_id: Qwen2.5-1.5B-Open-R1-GRPO |
| hub_model_revision: main |
| hub_private_repo: null |
| hub_strategy: !!python/object/apply:transformers.trainer_utils.HubStrategy |
| - every_save |
| hub_token: null |
| ignore_data_skip: false |
| include_for_metrics: [] |
| include_inputs_for_metrics: false |
| include_num_input_tokens_seen: false |
| include_tokens_per_second: false |
| jit_mode_eval: false |
| label_names: null |
| label_smoothing_factor: 0.0 |
| learning_rate: 1.0e-06 |
| length_column_name: length |
| load_best_model_at_end: false |
| local_rank: 0 |
| log_completions: true |
| log_level: info |
| log_level_replica: warning |
| log_on_each_node: true |
| logging_dir: ./.tensorboard_logging/f5655cd2/ |
| logging_first_step: true |
| logging_nan_inf_filter: true |
| logging_steps: 5 |
| logging_strategy: !!python/object/apply:transformers.trainer_utils.IntervalStrategy |
| - steps |
| lr_scheduler_kwargs: {} |
| lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType |
| - constant_with_warmup |
| max_completion_length: 4096 |
| max_grad_norm: 0.2 |
| max_prompt_length: 2048 |
| max_steps: -1 |
| metric_for_best_model: loss |
| min_p: null |
| model_init_kwargs: '{''revision'': ''main'', ''trust_remote_code'': False, ''attn_implementation'': |
| ''flash_attention_2'', ''torch_dtype'': torch.bfloat16, ''use_cache'': False}' |
| mp_parameters: '' |
| neftune_noise_alpha: null |
| no_cuda: false |
| num_completions_to_print: 1 |
| num_generations: 16 |
| num_iterations: 1 |
| num_train_epochs: 1.0 |
| optim: !!python/object/apply:transformers.training_args.OptimizerNames |
| - adamw_8bit |
| optim_args: null |
| optim_target_modules: null |
| output_dir: base_models/grpo/Qwen/Qwen2.5-Coder-7B-Instruct/bs_256_ml_4096_gen_16_f5655cd2_RL |
| overwrite_hub_revision: false |
| overwrite_output_dir: false |
| past_index: -1 |
| per_device_eval_batch_size: 8 |
| per_device_train_batch_size: 8 |
| per_gpu_eval_batch_size: null |
| per_gpu_train_batch_size: null |
| prediction_loss_only: false |
| prompt_name: text2sql_model_grpo |
| push_to_hub: false |
| push_to_hub_model_id: null |
| push_to_hub_organization: null |
| push_to_hub_revision: false |
| push_to_hub_token: null |
| ray_scope: last |
| ref_model_mixup_alpha: 0.6 |
| ref_model_sync_steps: 512 |
| remove_unused_columns: false |
| repetition_penalty: 1.0 |
| report_to: |
| - tensorboard |
| - wandb |
| restore_callback_states_from_checkpoint: false |
| resume_from_checkpoint: 'True' |
| reward_weights: |
| - 0.85 |
| - 0.1 |
| - 0.05 |
| run_name: exp-9-7B-QATCH |
| save_on_each_node: false |
| save_only_model: false |
| save_safetensors: true |
| save_steps: 0.1 |
| save_strategy: !!python/object/apply:transformers.trainer_utils.SaveStrategy |
| - steps |
| save_total_limit: 3 |
| scale_rewards: true |
| seed: 42 |
| skip_memory_metrics: true |
| stratified_by_complexity: false |
| sync_ref_model: false |
| temperature: 0.7 |
| tf32: null |
| top_k: 50 |
| top_p: 1.0 |
| torch_compile: false |
| torch_compile_backend: null |
| torch_compile_mode: null |
| torch_empty_cache_steps: null |
| torchdynamo: null |
| tp_size: 0 |
| tpu_metrics_debug: false |
| tpu_num_cores: null |
| use_cpu: false |
| use_ipex: false |
| use_legacy_prediction_loop: false |
| use_liger_kernel: false |
| use_liger_loss: false |
| use_mps_device: false |
| use_vllm: true |
| validation_split: 0.2 |
| vllm_device: auto |
| vllm_dtype: bfloat16 |
| vllm_enable_prefix_caching: null |
| vllm_gpu_memory_utilization: 0.7 |
| vllm_guided_decoding_regex: null |
| vllm_max_model_len: null |
| vllm_server_host: 127.0.0.1 |
| vllm_server_port: 24879 |
| vllm_server_timeout: 120.0 |
| wandb_log_unique_prompts: true |
| warmup_ratio: 0.1 |
| warmup_steps: 0 |
| weight_decay: 0.0 |
|
|