anbench commited on
Commit
165ee16
·
verified ·
1 Parent(s): 8a2113a

Upload folder using huggingface_hub

Browse files
Dockerfile ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
3
+ RUN apt-get update && apt-get install -y python3-pip git
4
+ COPY requirements.txt .
5
+ RUN pip3 install -r requirements.txt
6
+ COPY launch_nemotron_opus_distillation.py train.py
7
+ CMD ["python3", "train.py"]
launch_nemotron_opus_distillation.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # unsloth-finetune/launch_nemotron_opus_distillation.py
2
+
3
+ import os
4
+ import subprocess
5
+ from unsloth import FastLanguageModel
6
+ from unsloth.chat_templates import get_chat_template, train_on_responses_only
7
+ import torch
8
+ from trl import SFTTrainer
9
+ from transformers import TrainingArguments
10
+ from datasets import load_dataset
11
+
12
+ # ==========================================
13
+ # Phase 1: Hugging Face Storage Integration
14
+ # ==========================================
15
+ # We use HF CLI to create a dedicated bucket for our training artifacts.
16
+ # This prevents Git LFS bottlenecks and uses Xet deduplication for fast checkpoints.
17
+
18
+ HF_BUCKET_NAME = "nemotron-opus-distill-runs"
19
+ print(f"Ensuring HF Storage Bucket '{HF_BUCKET_NAME}' exists...")
20
+ try:
21
+ subprocess.run(["hf", "buckets", "create", HF_BUCKET_NAME], check=False, capture_output=True)
22
+ print("HF Storage Bucket ready!")
23
+ except FileNotFoundError:
24
+ print("WARNING: 'hf' CLI not found. Make sure to install it: pip install -U huggingface_hub[cli]")
25
+ print("Falling back to local storage only for now.")
26
+
27
+ # ==========================================
28
+ # Phase 2: Unsloth Model Loading
29
+ # ==========================================
30
+ max_seq_length = 4096
31
+ dtype = None
32
+ load_in_4bit = True # 4-bit allows this 30B model to easily fit on a 24GB or 40GB GPU
33
+
34
+ print("\nLoading NVIDIA Nemotron-3-Nano-30B-A3B via Unsloth...")
35
+ model, tokenizer = FastLanguageModel.from_pretrained(
36
+ model_name = "unsloth/Nemotron-3-Nano-30B-A3B",
37
+ max_seq_length = max_seq_length,
38
+ dtype = dtype,
39
+ load_in_4bit = load_in_4bit,
40
+ )
41
+
42
+ # Apply Hybrid LatentMoE/Mamba LoRA Adapters
43
+ print("Applying Hybrid LoRA Adapters...")
44
+ model = FastLanguageModel.get_peft_model(
45
+ model,
46
+ r = 16,
47
+ # Target standard layers + Mamba projections for deep reasoning logic capture
48
+ target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
49
+ "gate_proj", "up_proj", "down_proj",
50
+ "in_proj", "out_proj"],
51
+ lora_alpha = 32,
52
+ lora_dropout = 0,
53
+ bias = "none",
54
+ use_gradient_checkpointing = "unsloth",
55
+ random_state = 3407,
56
+ )
57
+
58
+ # ==========================================
59
+ # Phase 3: Claude 4.6 Opus Reasoning Distillation
60
+ # ==========================================
61
+ tokenizer = get_chat_template(
62
+ tokenizer,
63
+ chat_template = "chatml",
64
+ )
65
+
66
+ print("\nStreaming Opus-4.6-Reasoning Dataset from HF Hub...")
67
+ # Note: HF's dataset library natively streams from their CDN
68
+ dataset = load_dataset("nohurry/Opus-4.6-Reasoning-3000x-filtered", split = "train")
69
+
70
+ # We format the dataset based on the exact columns in nohurry/Opus-4.6-Reasoning-3000x-filtered
71
+ # The columns are: problem, thinking, solution
72
+ def format_reasoning_prompts(examples):
73
+ problems = examples["problem"]
74
+ thinkings = examples["thinking"]
75
+ solutions = examples["solution"]
76
+
77
+ texts = []
78
+ for problem, thinking, solution in zip(problems, thinkings, solutions):
79
+ # Force the model to generate <think> blocks before answering
80
+ convo = [
81
+ {"role": "user", "content": problem},
82
+ {"role": "assistant", "content": f"<think>\n{thinking}\n</think>\n{solution}"}
83
+ ]
84
+ text = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
85
+ texts.append(text)
86
+ return { "text" : texts }
87
+
88
+ dataset = dataset.map(format_reasoning_prompts, batched = True)
89
+
90
+ # ==========================================
91
+ # Phase 4: Training & Xet Deduplication Checkpointing
92
+ # ==========================================
93
+ print("\nSetting up Trainer...")
94
+ local_output_dir = "nemotron_outputs"
95
+
96
+ trainer = SFTTrainer(
97
+ model = model,
98
+ tokenizer = tokenizer,
99
+ train_dataset = dataset,
100
+ dataset_text_field = "text",
101
+ max_seq_length = max_seq_length,
102
+ dataset_num_proc = 2,
103
+ packing = False,
104
+ args = TrainingArguments(
105
+ per_device_train_batch_size = 2,
106
+ gradient_accumulation_steps = 8,
107
+ warmup_steps = 10,
108
+ max_steps = 500, # Increased steps for a true distillation run
109
+ learning_rate = 1e-4,
110
+ fp16 = not torch.cuda.is_bf16_supported(),
111
+ bf16 = torch.cuda.is_bf16_supported(),
112
+ logging_steps = 5,
113
+ save_steps = 50, # Save checkpoints every 50 steps
114
+ optim = "adamw_8bit",
115
+ weight_decay = 0.05,
116
+ lr_scheduler_type = "cosine",
117
+ seed = 3407,
118
+ output_dir = local_output_dir,
119
+ ),
120
+ )
121
+
122
+ trainer = train_on_responses_only(
123
+ trainer,
124
+ instruction_part = "<|im_start|>user\n",
125
+ response_part = "<|im_start|>assistant\n",
126
+ )
127
+
128
+ # Start a background process to sync checkpoints to HF Storage Bucket using Xet Deduplication
129
+ print(f"Starting background HF Bucket sync: local '{local_output_dir}' -> bucket '{HF_BUCKET_NAME}'")
130
+ try:
131
+ # Use hf sync to continuously push changes. Because of Xet, it only uploads the tiny diffs!
132
+ sync_process = subprocess.Popen(["hf", "sync", local_output_dir, f"hf://buckets/{HF_BUCKET_NAME}"],
133
+ stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
134
+ except FileNotFoundError:
135
+ sync_process = None
136
+
137
+ print("Starting Reasoning Distillation Fine-tuning!")
138
+ trainer_stats = trainer.train()
139
+
140
+ if sync_process:
141
+ sync_process.terminate()
142
+
143
+ # ==========================================
144
+ # Phase 5: GGUF Export to Bucket
145
+ # ==========================================
146
+ print("\nTraining Complete! Exporting to GGUF and pushing directly to Storage Bucket...")
147
+ # We use Unsloth's native GGUF exporter, but target our high-speed HF Bucket instead of a standard repo
148
+ try:
149
+ model.push_to_hub_gguf(
150
+ f"hf://buckets/{HF_BUCKET_NAME}/Nemotron-3-Super-Opus-Reasoning-GGUF",
151
+ tokenizer,
152
+ quantization_method="q4_k_m"
153
+ )
154
+ print("GGUF successfully uploaded to HF Storage Bucket!")
155
+ except Exception as e:
156
+ print(f"Failed to push GGUF (check HF Token). Error: {e}")
157
+
158
+ print("All tasks completed.")
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ torch
3
+ unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
4
+ huggingface_hub[cli]
5
+ trl<0.9.0
6
+ peft
7
+ accelerate
8
+ bitsandbytes
9
+ datasets
10
+ transformers