pa_tool_3 / config_20251007_142947.json
holi-lab's picture
Upload folder using huggingface_hub
d5293e9 verified
{
"start_time": "2025-10-07 14:29:47",
"experiment_type": "open_ended",
"llm_config": {
"model_name": "meta-llama/Llama-3.1-8B-Instruct",
"use_base_model": null,
"model_size": null,
"model_path": "/home/work/haesungpyun/pa_tool/outputs/meta-llama/Llama-3.1-8B-Instruct_None/finetuning/all_tasks/epochs_3_lr_5e-5_batch_16_r_32_alpha_64_dropout_0.05/model/checkpoint-480"
},
"generation_config_list": [
{
"decoding_mode": "open_generate",
"polarity": "pos",
"is_positive": true,
"temperature": null,
"top_p": null,
"do_sample": null,
"max_new_tokens": null
}
],
"dataset_config_list": [
{
"dataset": "metatool",
"split": "train",
"num_option": 2,
"random_seed": 42,
"type": "data",
"num_permutations": 2,
"prompt_config": {
"mcq_inst_version": null,
"answer_format_version": null,
"option_symbol": null,
"option_wrapper": null
}
}
],
"probe_config_list": [
null
],
"steering_config_list": [
null
],
"finetuning_config_list": [
{
"save_name": "all_tasks",
"finetuning_method": "standard",
"num_train_epochs": 6,
"per_device_train_batch_size": 16,
"per_device_eval_batch_size": 4,
"target_modules": [],
"learning_rate": "5e-5",
"weight_decay": 0.01,
"warmup_step_ratio": 0.03,
"logging_steps_ratio": 0.02,
"eval_steps_ratio": 0.05,
"save_step_ratio": 0.1,
"save_total_limit": 20,
"gradient_accumulation_steps": 1,
"lr_scheduler_type": "cosine",
"gradient_clip_norm": 1.0,
"early_stopping_threshold": null,
"early_stopping_patience": null,
"load_best_model_at_end": true,
"metric_for_best_model": "eval_loss",
"greater_is_better": false,
"lora_config": {
"r": 32,
"lora_alpha": 64,
"lora_dropout": 0.05,
"target_modules": [
"q_proj",
"v_proj",
"k_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
"lm_head"
]
}
}
],
"icl_config_list": [
null
]
}