holi-lab
/

pa_tool_3

Model card Files Files and versions

pa_tool_3 / config_20251007_142947.json

holi-lab's picture

Upload folder using huggingface_hub

d5293e9 verified 7 months ago

history blame contribute delete

2.21 kB

	{
	"start_time": "2025-10-07 14:29:47",
	"experiment_type": "open_ended",
	"llm_config": {
	"model_name": "meta-llama/Llama-3.1-8B-Instruct",
	"use_base_model": null,
	"model_size": null,
	"model_path": "/home/work/haesungpyun/pa_tool/outputs/meta-llama/Llama-3.1-8B-Instruct_None/finetuning/all_tasks/epochs_3_lr_5e-5_batch_16_r_32_alpha_64_dropout_0.05/model/checkpoint-480"
	},
	"generation_config_list": [
	{
	"decoding_mode": "open_generate",
	"polarity": "pos",
	"is_positive": true,
	"temperature": null,
	"top_p": null,
	"do_sample": null,
	"max_new_tokens": null
	}
	],
	"dataset_config_list": [
	{
	"dataset": "metatool",
	"split": "train",
	"num_option": 2,
	"random_seed": 42,
	"type": "data",
	"num_permutations": 2,
	"prompt_config": {
	"mcq_inst_version": null,
	"answer_format_version": null,
	"option_symbol": null,
	"option_wrapper": null
	}
	}
	],
	"probe_config_list": [
	null
	],
	"steering_config_list": [
	null
	],
	"finetuning_config_list": [
	{
	"save_name": "all_tasks",
	"finetuning_method": "standard",
	"num_train_epochs": 6,
	"per_device_train_batch_size": 16,
	"per_device_eval_batch_size": 4,
	"target_modules": [],
	"learning_rate": "5e-5",
	"weight_decay": 0.01,
	"warmup_step_ratio": 0.03,
	"logging_steps_ratio": 0.02,
	"eval_steps_ratio": 0.05,
	"save_step_ratio": 0.1,
	"save_total_limit": 20,
	"gradient_accumulation_steps": 1,
	"lr_scheduler_type": "cosine",
	"gradient_clip_norm": 1.0,
	"early_stopping_threshold": null,
	"early_stopping_patience": null,
	"load_best_model_at_end": true,
	"metric_for_best_model": "eval_loss",
	"greater_is_better": false,
	"lora_config": {
	"r": 32,
	"lora_alpha": 64,
	"lora_dropout": 0.05,
	"target_modules": [
	"q_proj",
	"v_proj",
	"k_proj",
	"o_proj",
	"gate_proj",
	"up_proj",
	"down_proj",
	"lm_head"
	]
	}
	}
	],
	"icl_config_list": [
	null
	]
	}