Andikaasaputraa commited on
Commit
dac161a
·
verified ·
1 Parent(s): 8d9f343

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. __pycache__/app.cpython-312.pyc +0 -0
  2. app.py +29 -12
  3. requirements.txt +1 -7
__pycache__/app.cpython-312.pyc ADDED
Binary file (7.31 kB). View file
 
app.py CHANGED
@@ -1,23 +1,41 @@
1
  import os
2
  import gradio as gr
3
- import torch
4
  import subprocess
5
  from huggingface_hub import login
 
6
 
7
  def run_finetuning(hf_token):
8
  if not hf_token:
9
- return "ERROR: HF Token is required!"
 
10
 
11
  yield "Logging in to Hugging Face..."
12
  try:
13
  login(token=hf_token)
14
- yield "Login successful! Installing dependencies (unsloth etc)..."
15
  except Exception as e:
16
- return f"Login failed: {e}"
 
17
 
18
- # To avoid blocking the Gradio UI and memory issues, we can run the actual training as a subprocess.
19
- # But since we are generating the logic directly, let's write the training script and run it.
 
 
 
 
 
20
 
 
 
 
 
 
 
 
 
 
 
 
21
  train_script = """
22
  import os
23
  import torch
@@ -98,7 +116,7 @@ for row in ds_njirlah:
98
  {'role': 'assistant', 'content': str(out)},
99
  ]
100
  all_texts.append(tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False))
101
- elif 'text' in row and '<|im_start|>' in row['text']: # If already formatted in chatml
102
  all_texts.append(row['text'])
103
 
104
  merged_dataset = Dataset.from_dict({'text': all_texts})
@@ -146,11 +164,10 @@ print('ALL DONE!')
146
  with open("train.py", "w") as f:
147
  f.write(train_script)
148
 
149
- yield "Training script generated! Starting subprocess..."
150
- process = subprocess.Popen(["python", "train.py"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
151
 
152
  for line in process.stdout:
153
- yield f"Logs: {line.strip()}"
154
 
155
  process.wait()
156
  if process.returncode == 0:
@@ -160,13 +177,13 @@ print('ALL DONE!')
160
 
161
  with gr.Blocks(title="NJIRLAH-OSS-1 Mega Finetune", theme=gr.themes.Monochrome()) as app:
162
  gr.Markdown("# 🚀 NJIRLAH-OSS-1 Mega Finetune Engine (Hugging Face Spaces)")
163
- gr.Markdown("Tool ini akan menjalankan seluruh logic training dari Kaggle sebelumnya secara otomatis menggunakan GPU di Hugging Face Spaces.")
164
 
165
  with gr.Row():
166
  hf_token_input = gr.Textbox(label="Hugging Face Token", type="password", placeholder="hf_...")
167
  start_btn = gr.Button("Mulai Finetune & Push ke Hub!", variant="primary")
168
 
169
- output_logs = gr.Textbox(label="Training Logs", lines=20, max_lines=30)
170
 
171
  start_btn.click(fn=run_finetuning, inputs=hf_token_input, outputs=output_logs)
172
 
 
1
  import os
2
  import gradio as gr
 
3
  import subprocess
4
  from huggingface_hub import login
5
+ import sys
6
 
7
  def run_finetuning(hf_token):
8
  if not hf_token:
9
+ yield "ERROR: HF Token is required!"
10
+ return
11
 
12
  yield "Logging in to Hugging Face..."
13
  try:
14
  login(token=hf_token)
15
+ yield "Login successful!"
16
  except Exception as e:
17
+ yield f"Login failed: {e}"
18
+ return
19
 
20
+ yield "Installing GPU dependencies dynamically (Unsloth, xformers, trl). Please wait ~3 minutes..."
21
+
22
+ # Run pip install dynamically at runtime (when GPU is attached)
23
+ pip_cmd = [
24
+ sys.executable, "-m", "pip", "install",
25
+ "unsloth", "xformers", "trl", "peft", "accelerate", "bitsandbytes"
26
+ ]
27
 
28
+ p_pip = subprocess.Popen(pip_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
29
+ for line in p_pip.stdout:
30
+ yield f"[PIP] {line.strip()}"
31
+ p_pip.wait()
32
+
33
+ if p_pip.returncode != 0:
34
+ yield "ERROR: Failed to install dependencies."
35
+ return
36
+
37
+ yield "Dependencies installed successfully! Starting Training Script..."
38
+
39
  train_script = """
40
  import os
41
  import torch
 
116
  {'role': 'assistant', 'content': str(out)},
117
  ]
118
  all_texts.append(tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False))
119
+ elif 'text' in row and '<|im_start|>' in row['text']:
120
  all_texts.append(row['text'])
121
 
122
  merged_dataset = Dataset.from_dict({'text': all_texts})
 
164
  with open("train.py", "w") as f:
165
  f.write(train_script)
166
 
167
+ process = subprocess.Popen([sys.executable, "train.py"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
 
168
 
169
  for line in process.stdout:
170
+ yield f"[TRAIN] {line.strip()}"
171
 
172
  process.wait()
173
  if process.returncode == 0:
 
177
 
178
  with gr.Blocks(title="NJIRLAH-OSS-1 Mega Finetune", theme=gr.themes.Monochrome()) as app:
179
  gr.Markdown("# 🚀 NJIRLAH-OSS-1 Mega Finetune Engine (Hugging Face Spaces)")
180
+ gr.Markdown("Tool ini akan menjalankan seluruh logic training dari Kaggle sebelumnya secara otomatis menggunakan GPU di Hugging Face Spaces. **Pastikan Anda sudah mengaktifkan GPU A10G atau L4 di Settings Space ini!**")
181
 
182
  with gr.Row():
183
  hf_token_input = gr.Textbox(label="Hugging Face Token", type="password", placeholder="hf_...")
184
  start_btn = gr.Button("Mulai Finetune & Push ke Hub!", variant="primary")
185
 
186
+ output_logs = gr.Textbox(label="Live Logs", lines=20, max_lines=30)
187
 
188
  start_btn.click(fn=run_finetuning, inputs=hf_token_input, outputs=output_logs)
189
 
requirements.txt CHANGED
@@ -1,9 +1,3 @@
1
  gradio
2
- torch
3
- transformers
4
  datasets
5
- trl
6
- peft
7
- accelerate
8
- bitsandbytes
9
- unsloth[cu121-ampere] @ git+https://github.com/unslothai/unsloth.git
 
1
  gradio
2
+ huggingface_hub
 
3
  datasets