rajkr commited on
Commit
868ce8f
·
verified ·
1 Parent(s): 8499b98

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +210 -0
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import gradio as gr
3
+ import os
4
+
5
+ # Model options
6
+ MODELS = [
7
+ # Small (fast, <3B)
8
+ 'Qwen/Qwen2.5-0.5B-Instruct',
9
+ 'Qwen/Qwen2.5-1.5B-Instruct',
10
+ 'microsoft/Phi-3-mini-128k-instruct',
11
+ 'google/gemma-2b-it',
12
+ 'meta-llama/Llama-3.2-1B-Instruct',
13
+ # Medium (7-13B)
14
+ 'Qwen/Qwen2.5-7B-Instruct',
15
+ 'meta-llama/Llama-3.2-3B-Instruct',
16
+ 'mistralai/Mistral-7B-Instruct-v0.3',
17
+ # Large (needs more GPU)
18
+ 'Qwen/Qwen2.5-14B-Instruct',
19
+ 'mistralai/Mixtral-8x7B-Instruct-v0.1',
20
+ ]
21
+
22
+ # Training methods
23
+ METHODS = ['SFT', 'DPO', 'GRPO', 'LoRA']
24
+
25
+ # Public datasets
26
+ DATASETS = [
27
+ 'HuggingFaceH4/ultrachat_200k',
28
+ 'openai/gsm8k',
29
+ 'meta-math/MATH',
30
+ 'anthropic/hh-rlhf',
31
+ 'stanfordnlp/SHP',
32
+ ]
33
+
34
+ def generate_training_script(model, method, dataset, epochs, lr, batch_size, max_length, output_name):
35
+ template = f'''#!/usr/bin/env python3
36
+ # Auto-generated training script
37
+ # Model: {model}
38
+ # Method: {method}
39
+ # Dataset: {dataset}
40
+
41
+ from trl import SFTTrainer, SFTConfig
42
+ from datasets import load_dataset
43
+
44
+ dataset = load_dataset('{dataset}', split='train_sft')
45
+
46
+ training_args = SFTConfig(
47
+ learning_rate={lr},
48
+ num_train_epochs={epochs},
49
+ per_device_train_batch_size={batch_size},
50
+ gradient_accumulation_steps=4,
51
+ max_seq_length={max_length},
52
+ gradient_checkpointing=True,
53
+ bf16=True,
54
+ output_dir='./{output_name}-output',
55
+ push_to_hub=True,
56
+ hub_model_id='YOUR_USERNAME/{output_name}',
57
+ logging_steps=10,
58
+ disable_tqdm=True,
59
+ )
60
+
61
+ trainer = SFTTrainer(
62
+ model='{model}',
63
+ args=training_args,
64
+ train_dataset=dataset,
65
+ )
66
+
67
+ trainer.train()
68
+ trainer.push_to_hub()
69
+ '''
70
+ return template
71
+
72
+ def generate_dpo_script(model, dataset, epochs, lr, batch_size, output_name):
73
+ template = f'''#!/usr/bin/env python3
74
+ # DPO Training Script
75
+ # Model: {model}
76
+ # Dataset: {dataset}
77
+
78
+ from trl import DPOTrainer, DPOConfig
79
+ from datasets import load_dataset
80
+
81
+ dataset = load_dataset('{dataset}', split='train')
82
+
83
+ training_args = DPOConfig(
84
+ learning_rate={lr},
85
+ num_train_epochs={epochs},
86
+ per_device_train_batch_size={batch_size},
87
+ max_seq_length={512},
88
+ bf16=True,
89
+ output_dir='./{output_name}-output',
90
+ push_to_hub=True,
91
+ hub_model_id='YOUR_USERNAME/{output_name}',
92
+ )
93
+
94
+ trainer = DPOTrainer(
95
+ model='{model}',
96
+ args=training_args,
97
+ train_dataset=dataset,
98
+ )
99
+
100
+ trainer.train()
101
+ trainer.push_to_hub()
102
+ '''
103
+ return template
104
+
105
+ def generate_lora_script(model, dataset, epochs, lr, batch_size, output_name):
106
+ template = f'''#!/usr/bin/env python3
107
+ # LoRA Fine-tuning Script
108
+ # Model: {model}
109
+ # Dataset: {dataset}
110
+
111
+ from trl import SFTTrainer, SFTConfig
112
+ from peft import LoraConfig
113
+ from datasets import load_dataset
114
+
115
+ dataset = load_dataset('{dataset}', split='train_sft')
116
+
117
+ peft_config = LoraConfig(
118
+ r=16,
119
+ lora_alpha=32,
120
+ lora_dropout=0.05,
121
+ task_type='CAUSAL_LM',
122
+ )
123
+
124
+ training_args = SFTConfig(
125
+ learning_rate={lr * 10}, # LoRA needs higher LR
126
+ num_train_epochs={epochs},
127
+ per_device_train_batch_size={batch_size},
128
+ max_seq_length=2048,
129
+ bf16=True,
130
+ output_dir='./{output_name}-output',
131
+ push_to_hub=True,
132
+ hub_model_id='YOUR_USERNAME/{output_name}',
133
+ )
134
+
135
+ trainer = SFTTrainer(
136
+ model='{model}',
137
+ args=training_args,
138
+ train_dataset=dataset,
139
+ peft_config=peft_config,
140
+ )
141
+
142
+ trainer.train()
143
+ trainer.push_to_hub()
144
+ '''
145
+ return template
146
+
147
+ def generate_script(model, method, dataset, epochs, lr, batch_size, max_length, output_name):
148
+ if method == 'DPO':
149
+ return generate_dpo_script(model, dataset, epochs, lr, batch_size, output_name)
150
+ elif method == 'LoRA':
151
+ return generate_lora_script(model, dataset, epochs, lr, batch_size, output_name)
152
+ else: # SFT, GRPO
153
+ return generate_training_script(model, method, dataset, epochs, lr, batch_size, max_length, output_name)
154
+
155
+ def get_hardware_requirement(model):
156
+ if '0.5B' in model or '1B' in model:
157
+ return 'a10g-small (24GB VRAM)'
158
+ elif '3B' in model or '7B' in model:
159
+ return 'a10g-large or a100-small (24-80GB VRAM)'
160
+ else:
161
+ return 'a100-large (80GB VRAM)'
162
+
163
+ # Gradio UI
164
+ with gr.Blocks(title='ML Model Trainer', theme=gr.themes.Soft()) as demo:
165
+ gr.Markdown('# 🤖 ML Model Trainer')
166
+ gr.Markdown('Generate training scripts for SFT, DPO, LoRA fine-tuning')
167
+
168
+ with gr.Row():
169
+ with gr.Column():
170
+ model = gr.Dropdown(MODELS, label='Model', value='Qwen/Qwen2.5-0.5B-Instruct')
171
+ method = gr.Dropdown(METHODS, label='Training Method', value='SFT')
172
+ dataset = gr.Dropdown(DATASETS, label='Dataset', value='HuggingFaceH4/ultrachat_200k')
173
+ output_name = gr.Textbox(label='Output Model Name', value='my-finetuned-model')
174
+
175
+ with gr.Column():
176
+ epochs = gr.Slider(1, 10, value=3, step=1, label='Epochs')
177
+ lr = gr.Number(label='Learning Rate', value=2e-5)
178
+ batch_size = gr.Slider(1, 16, value=4, step=1, label='Batch Size')
179
+ max_length = gr.Slider(256, 8192, value=4096, step=256, label='Max Sequence Length')
180
+
181
+ generate_btn = gr.Button('🔧 Generate Training Script', variant='primary')
182
+
183
+ output_code = gr.Code(label='Training Script', language='python', lines=20)
184
+
185
+ hardware_info = gr.Markdown('')
186
+
187
+ def on_generate(model, method, dataset, epochs, lr, batch_size, max_length, output_name):
188
+ script = generate_script(model, method, dataset, epochs, lr, batch_size, max_length, output_name)
189
+ hw = get_hardware_requirement(model)
190
+ return script, f'**Recommended Hardware:** {hw} | **Timeout:** ~{int(epochs * 2)}h'
191
+
192
+ generate_btn.click(
193
+ on_generate,
194
+ inputs=[model, method, dataset, epochs, lr, batch_size, max_length, output_name],
195
+ outputs=[output_code, hardware_info]
196
+ )
197
+
198
+ gr.Markdown('---')
199
+ gr.Markdown('### 📋 How to Use')
200
+ gr.Markdown('''
201
+ 1. Configure your training parameters above
202
+ 2. Click **Generate Training Script**
203
+ 3. Copy the script to a file (e.g., `train.py`)
204
+ 4. Install dependencies: `pip install transformers trl torch datasets accelerate peft`
205
+ 5. Run: `python train.py`
206
+
207
+ **Note:** You'll need Hugging Face Pro or compute credits for cloud training.
208
+ ''')
209
+
210
+ demo.launch()