| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| """ |
| SFT Multi-Task Training Script for n8n Agent |
| |
| This script fine-tunes the DPO-trained Qwen3-0.6B model on multi-task n8n workflows. |
| It builds on the reasoning capabilities from DPO training and adds task-specific skills. |
| |
| Tasks covered: |
| - generate: Create workflows from descriptions |
| - edit: Modify existing workflows |
| - fix: Correct errors in workflows |
| - explain: Explain what workflows do |
| - debug: Diagnose execution issues |
| - improve: Optimize and enhance workflows |
| |
| Usage: |
| hf jobs uv run \ |
| --script train_qwen3_sft_multitask.py \ |
| --flavor l4x1 \ |
| --timeout 24h |
| """ |
|
|
| import os |
| import json |
| import torch |
| from datasets import Dataset |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from peft import LoraConfig, PeftModel, get_peft_model |
| from trl import SFTTrainer, SFTConfig |
| from huggingface_hub import login, hf_hub_download |
|
|
| |
| |
| |
|
|
| |
| BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen3-0.6B") |
| DPO_ADAPTER = os.environ.get("DPO_ADAPTER", "stmasson/qwen3-0.6b-n8n-reasoning") |
|
|
| |
| DATASET_REPO = "stmasson/n8n-agentic-multitask" |
| TRAIN_FILE = "data/multitask_large/train.jsonl" |
| VAL_FILE = "data/multitask_large/val.jsonl" |
|
|
| |
| OUTPUT_DIR = "./qwen3-sft-multitask" |
| HF_REPO = os.environ.get("HF_REPO", "stmasson/qwen3-0.6b-n8n-agent") |
|
|
| |
| NUM_EPOCHS = int(os.environ.get("NUM_EPOCHS", "1")) |
| BATCH_SIZE = int(os.environ.get("BATCH_SIZE", "1")) |
| GRAD_ACCUM = int(os.environ.get("GRAD_ACCUM", "8")) |
| LEARNING_RATE = float(os.environ.get("LEARNING_RATE", "1e-5")) |
| MAX_SEQ_LENGTH = int(os.environ.get("MAX_SEQ_LENGTH", "4096")) |
|
|
| |
| LORA_R = int(os.environ.get("LORA_R", "32")) |
| LORA_ALPHA = int(os.environ.get("LORA_ALPHA", "64")) |
| LORA_DROPOUT = float(os.environ.get("LORA_DROPOUT", "0.05")) |
|
|
| |
| |
| |
|
|
| print("=" * 60) |
| print("SFT MULTI-TASK TRAINING - N8N AGENT") |
| print("=" * 60) |
|
|
| hf_token = os.environ.get("HF_TOKEN") |
| if hf_token: |
| login(token=hf_token) |
| print("Authenticated with HuggingFace") |
| else: |
| print("Warning: HF_TOKEN not set, push disabled") |
|
|
| |
| |
| |
|
|
| print(f"\nLoading base model: {BASE_MODEL}") |
| print(f"Loading DPO adapter: {DPO_ADAPTER}") |
|
|
| |
| model = AutoModelForCausalLM.from_pretrained( |
| BASE_MODEL, |
| torch_dtype=torch.bfloat16, |
| attn_implementation="sdpa", |
| device_map="auto", |
| trust_remote_code=True, |
| ) |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| tokenizer.padding_side = "right" |
|
|
| |
| print("Loading and merging DPO adapter...") |
| model = PeftModel.from_pretrained(model, DPO_ADAPTER) |
| model = model.merge_and_unload() |
| print("DPO adapter merged successfully!") |
|
|
| print(f"Model loaded: {model.config.num_hidden_layers} layers, {model.config.hidden_size} hidden size") |
|
|
| |
| |
| |
|
|
| print(f"\nNew LoRA config for SFT: r={LORA_R}, alpha={LORA_ALPHA}") |
|
|
| lora_config = LoraConfig( |
| r=LORA_R, |
| lora_alpha=LORA_ALPHA, |
| target_modules=[ |
| "q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj" |
| ], |
| lora_dropout=LORA_DROPOUT, |
| bias="none", |
| task_type="CAUSAL_LM" |
| ) |
|
|
| |
| |
| |
|
|
| print(f"\nLoading dataset: {DATASET_REPO}") |
|
|
| def load_jsonl_dataset(repo_id: str, filename: str) -> Dataset: |
| """Load JSONL dataset and extract only messages column.""" |
| local_path = hf_hub_download( |
| repo_id=repo_id, |
| filename=filename, |
| repo_type="dataset" |
| ) |
|
|
| messages_list = [] |
| with open(local_path, 'r', encoding='utf-8') as f: |
| for line in f: |
| data = json.loads(line) |
| messages_list.append({"messages": data["messages"]}) |
|
|
| return Dataset.from_list(messages_list) |
|
|
| |
| train_dataset = load_jsonl_dataset(DATASET_REPO, TRAIN_FILE) |
| val_dataset = load_jsonl_dataset(DATASET_REPO, VAL_FILE) |
|
|
| print(f"Train: {len(train_dataset)} examples") |
| print(f"Validation: {len(val_dataset)} examples") |
|
|
| |
| def filter_by_length(example): |
| """Filter examples that would be too long.""" |
| total_len = sum(len(m.get('content', '')) for m in example['messages']) |
| return total_len < 30000 |
|
|
| print("Filtering long examples...") |
| train_dataset = train_dataset.filter(filter_by_length) |
| val_dataset = val_dataset.filter(filter_by_length) |
| print(f"After filtering - Train: {len(train_dataset)}, Val: {len(val_dataset)}") |
|
|
| |
| def format_example(example): |
| """Format messages to text for training.""" |
| messages = example["messages"] |
| text = tokenizer.apply_chat_template( |
| messages, |
| tokenize=False, |
| add_generation_prompt=False |
| ) |
| return {"text": text} |
|
|
| print("Formatting data...") |
| train_dataset = train_dataset.map(format_example, remove_columns=train_dataset.column_names) |
| val_dataset = val_dataset.map(format_example, remove_columns=val_dataset.column_names) |
|
|
| |
| print("\nExample formatted data:") |
| print(train_dataset[0]["text"][:500] + "...") |
|
|
| |
| |
| |
|
|
| print(f"\nTraining configuration:") |
| print(f" - Epochs: {NUM_EPOCHS}") |
| print(f" - Batch size: {BATCH_SIZE}") |
| print(f" - Gradient accumulation: {GRAD_ACCUM}") |
| print(f" - Effective batch size: {BATCH_SIZE * GRAD_ACCUM}") |
| print(f" - Learning rate: {LEARNING_RATE}") |
| print(f" - Max sequence length: {MAX_SEQ_LENGTH}") |
|
|
| training_args = SFTConfig( |
| output_dir=OUTPUT_DIR, |
| num_train_epochs=NUM_EPOCHS, |
| per_device_train_batch_size=BATCH_SIZE, |
| per_device_eval_batch_size=BATCH_SIZE, |
| gradient_accumulation_steps=GRAD_ACCUM, |
| learning_rate=LEARNING_RATE, |
| lr_scheduler_type="cosine", |
| warmup_ratio=0.05, |
| weight_decay=0.01, |
| bf16=True, |
| tf32=True, |
| logging_steps=50, |
| save_strategy="steps", |
| save_steps=1000, |
| save_total_limit=3, |
| eval_strategy="steps", |
| eval_steps=1000, |
| max_length=MAX_SEQ_LENGTH, |
| packing=False, |
| gradient_checkpointing=True, |
| gradient_checkpointing_kwargs={"use_reentrant": False}, |
| dataset_text_field="text", |
| report_to="none", |
| run_name="qwen3-sft-multitask", |
| hub_model_id=HF_REPO if hf_token else None, |
| push_to_hub=bool(hf_token), |
| hub_strategy="checkpoint", |
| ) |
|
|
| |
| |
| |
|
|
| print("\nInitializing SFT trainer...") |
|
|
| trainer = SFTTrainer( |
| model=model, |
| args=training_args, |
| train_dataset=train_dataset, |
| eval_dataset=val_dataset, |
| peft_config=lora_config, |
| processing_class=tokenizer, |
| ) |
|
|
| |
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| total_params = sum(p.numel() for p in model.parameters()) |
| print(f"\nTrainable parameters: {trainable_params:,} / {total_params:,} ({100 * trainable_params / total_params:.2f}%)") |
|
|
| print("\n" + "=" * 60) |
| print("STARTING SFT MULTI-TASK TRAINING") |
| print("=" * 60) |
|
|
| trainer.train() |
|
|
| |
| |
| |
|
|
| print("\nSaving model...") |
| trainer.save_model(f"{OUTPUT_DIR}/final") |
|
|
| if hf_token: |
| print(f"Pushing to {HF_REPO}...") |
| trainer.push_to_hub() |
| print(f"Model available at: https://huggingface.co/{HF_REPO}") |
|
|
| print("\n" + "=" * 60) |
| print("SFT MULTI-TASK TRAINING COMPLETE") |
| print("=" * 60) |
|
|