import streamlit as st import os st.set_page_config(page_title='ML Model Trainer', page_icon='🤖') st.title('🤖 ML Model Trainer') st.markdown('Generate training scripts for SFT, DPO, LoRA fine-tuning') # Model options MODELS = [ 'Qwen/Qwen2.5-0.5B-Instruct', 'Qwen/Qwen2.5-1.5B-Instruct', 'Qwen/Qwen2.5-7B-Instruct', 'meta-llama/Llama-3.2-1B-Instruct', 'meta-llama/Llama-3.2-3B-Instruct', 'microsoft/Phi-3-mini-128k-instruct', 'google/gemma-2b-it', 'mistralai/Mistral-7B-Instruct-v0.3', ] METHODS = ['SFT', 'DPO', 'LoRA'] DATASETS = [ 'HuggingFaceH4/ultrachat_200k', 'openai/gsm8k', 'meta-math/MATH', 'anthropic/hh-rlhf', ] col1, col2 = st.columns(2) with col1: model = st.selectbox('Model', MODELS) method = st.selectbox('Training Method', METHODS) dataset = st.selectbox('Dataset', DATASETS) output_name = st.text_input('Output Model Name', 'my-finetuned-model') with col2: epochs = st.slider('Epochs', 1, 10, 3) lr = st.number_input('Learning Rate', value=2e-5, format='%.0e') batch_size = st.slider('Batch Size', 1, 16, 4) max_length = st.slider('Max Sequence Length', 256, 8192, 4096, 256) if st.button('🔧 Generate Training Script', type='primary'): if method == 'SFT': script = f'''#!/usr/bin/env python3 from trl import SFTTrainer, SFTConfig from datasets import load_dataset dataset = load_dataset('{dataset}', split='train_sft') training_args = SFTConfig( learning_rate={lr}, num_train_epochs={epochs}, per_device_train_batch_size={batch_size}, gradient_accumulation_steps=4, max_seq_length={max_length}, gradient_checkpointing=True, bf16=True, output_dir='./{output_name}-output', push_to_hub=True, hub_model_id='YOUR_USERNAME/{output_name}', logging_steps=10, disable_tqdm=True, ) trainer = SFTTrainer( model='{model}', args=training_args, train_dataset=dataset, ) trainer.train() trainer.push_to_hub() ''' elif method == 'DPO': script = f'''#!/usr/bin/env python3 from trl import DPOTrainer, DPOConfig from datasets import load_dataset dataset = load_dataset('{dataset}', split='train') training_args = DPOConfig( learning_rate={lr}, num_train_epochs={epochs}, per_device_train_batch_size={batch_size}, max_seq_length=512, bf16=True, output_dir='./{output_name}-output', push_to_hub=True, hub_model_id='YOUR_USERNAME/{output_name}', ) trainer = DPOTrainer( model='{model}', args=training_args, train_dataset=dataset, ) trainer.train() trainer.push_to_hub() ''' else: # LoRA script = f'''#!/usr/bin/env python3 from trl import SFTTrainer, SFTConfig from peft import LoraConfig from datasets import load_dataset dataset = load_dataset('{dataset}', split='train_sft') peft_config = LoraConfig( r=16, lora_alpha=32, lora_dropout=0.05, task_type='CAUSAL_LM', ) training_args = SFTConfig( learning_rate={lr * 10}, num_train_epochs={epochs}, per_device_train_batch_size={batch_size}, max_seq_length=2048, bf16=True, output_dir='./{output_name}-output', push_to_hub=True, hub_model_id='YOUR_USERNAME/{output_name}', ) trainer = SFTTrainer( model='{model}', args=training_args, train_dataset=dataset, peft_config=peft_config, ) trainer.train() trainer.push_to_hub() ''' st.code(script, language='python') # Hardware info if '0.5B' in model or '1B' in model: hw = 'a10g-small (24GB VRAM) - ~2h training' elif '3B' in model or '7B' in model: hw = 'a10g-large or a100-small (24-80GB VRAM) - ~4h training' else: hw = 'a100-large (80GB VRAM) - ~6h training' st.info(f'**Recommended:** {hw}') st.markdown('---') st.markdown('### 📋 How to Use') st.markdown(''' 1. Configure parameters above 2. Click **Generate Training Script** 3. Copy the script to `train.py` 4. Install: `pip install transformers trl torch datasets accelerate peft` 5. Run: `python train.py` **Note:** Need Hugging Face Pro or compute credits for cloud training. ''')