Spaces:

anbench
/

nemotron-opus-distillation

Runtime error

App Files Files Community

anbench commited on Mar 12

Commit

165ee16

verified ·

1 Parent(s): 8a2113a

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +7 -0
launch_nemotron_opus_distillation.py +158 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
+RUN apt-get update && apt-get install -y python3-pip git
+COPY requirements.txt .
+RUN pip3 install -r requirements.txt
+COPY launch_nemotron_opus_distillation.py train.py
+CMD ["python3", "train.py"]

launch_nemotron_opus_distillation.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# unsloth-finetune/launch_nemotron_opus_distillation.py
+import os
+import subprocess
+from unsloth import FastLanguageModel
+from unsloth.chat_templates import get_chat_template, train_on_responses_only
+import torch
+from trl import SFTTrainer
+from transformers import TrainingArguments
+from datasets import load_dataset
+# ==========================================
+# Phase 1: Hugging Face Storage Integration
+# ==========================================
+# We use HF CLI to create a dedicated bucket for our training artifacts.
+# This prevents Git LFS bottlenecks and uses Xet deduplication for fast checkpoints.
+HF_BUCKET_NAME = "nemotron-opus-distill-runs"
+print(f"Ensuring HF Storage Bucket '{HF_BUCKET_NAME}' exists...")
+try:
+    subprocess.run(["hf", "buckets", "create", HF_BUCKET_NAME], check=False, capture_output=True)
+    print("HF Storage Bucket ready!")
+except FileNotFoundError:
+    print("WARNING: 'hf' CLI not found. Make sure to install it: pip install -U huggingface_hub[cli]")
+    print("Falling back to local storage only for now.")
+# ==========================================
+# Phase 2: Unsloth Model Loading
+# ==========================================
+max_seq_length = 4096
+dtype = None
+load_in_4bit = True # 4-bit allows this 30B model to easily fit on a 24GB or 40GB GPU
+print("\nLoading NVIDIA Nemotron-3-Nano-30B-A3B via Unsloth...")
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name = "unsloth/Nemotron-3-Nano-30B-A3B",
+    max_seq_length = max_seq_length,
+    dtype = dtype,
+    load_in_4bit = load_in_4bit,
+)
+# Apply Hybrid LatentMoE/Mamba LoRA Adapters
+print("Applying Hybrid LoRA Adapters...")
+model = FastLanguageModel.get_peft_model(
+    model,
+    r = 16,
+    # Target standard layers + Mamba projections for deep reasoning logic capture
+    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
+                      "gate_proj", "up_proj", "down_proj",
+                      "in_proj", "out_proj"],
+    lora_alpha = 32,
+    lora_dropout = 0,
+    bias = "none",
+    use_gradient_checkpointing = "unsloth",
+    random_state = 3407,
+)
+# ==========================================
+# Phase 3: Claude 4.6 Opus Reasoning Distillation
+# ==========================================
+tokenizer = get_chat_template(
+    tokenizer,
+    chat_template = "chatml",
+)
+print("\nStreaming Opus-4.6-Reasoning Dataset from HF Hub...")
+# Note: HF's dataset library natively streams from their CDN
+dataset = load_dataset("nohurry/Opus-4.6-Reasoning-3000x-filtered", split = "train")
+# We format the dataset based on the exact columns in nohurry/Opus-4.6-Reasoning-3000x-filtered
+# The columns are: problem, thinking, solution
+def format_reasoning_prompts(examples):
+    problems  = examples["problem"]
+    thinkings = examples["thinking"]
+    solutions = examples["solution"]
+    texts = []
+    for problem, thinking, solution in zip(problems, thinkings, solutions):
+        # Force the model to generate <think> blocks before answering
+        convo = [
+            {"role": "user", "content": problem},
+            {"role": "assistant", "content": f"<think>\n{thinking}\n</think>\n{solution}"}
+        ]
+        text = tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False)
+        texts.append(text)
+    return { "text" : texts }
+dataset = dataset.map(format_reasoning_prompts, batched = True)
+# ==========================================
+# Phase 4: Training & Xet Deduplication Checkpointing
+# ==========================================
+print("\nSetting up Trainer...")
+local_output_dir = "nemotron_outputs"
+trainer = SFTTrainer(
+    model = model,
+    tokenizer = tokenizer,
+    train_dataset = dataset,
+    dataset_text_field = "text",
+    max_seq_length = max_seq_length,
+    dataset_num_proc = 2,
+    packing = False,
+    args = TrainingArguments(
+        per_device_train_batch_size = 2,
+        gradient_accumulation_steps = 8,
+        warmup_steps = 10,
+        max_steps = 500, # Increased steps for a true distillation run
+        learning_rate = 1e-4,
+        fp16 = not torch.cuda.is_bf16_supported(),
+        bf16 = torch.cuda.is_bf16_supported(),
+        logging_steps = 5,
+        save_steps = 50, # Save checkpoints every 50 steps
+        optim = "adamw_8bit",
+        weight_decay = 0.05,
+        lr_scheduler_type = "cosine",
+        seed = 3407,
+        output_dir = local_output_dir,
+    ),
+)
+trainer = train_on_responses_only(
+    trainer,
+    instruction_part = "<|im_start|>user\n",
+    response_part = "<|im_start|>assistant\n",
+)
+# Start a background process to sync checkpoints to HF Storage Bucket using Xet Deduplication
+print(f"Starting background HF Bucket sync: local '{local_output_dir}' -> bucket '{HF_BUCKET_NAME}'")
+try:
+    # Use hf sync to continuously push changes. Because of Xet, it only uploads the tiny diffs!
+    sync_process = subprocess.Popen(["hf", "sync", local_output_dir, f"hf://buckets/{HF_BUCKET_NAME}"],
+                                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+except FileNotFoundError:
+    sync_process = None
+print("Starting Reasoning Distillation Fine-tuning!")
+trainer_stats = trainer.train()
+if sync_process:
+    sync_process.terminate()
+# ==========================================
+# Phase 5: GGUF Export to Bucket
+# ==========================================
+print("\nTraining Complete! Exporting to GGUF and pushing directly to Storage Bucket...")
+# We use Unsloth's native GGUF exporter, but target our high-speed HF Bucket instead of a standard repo
+try:
+    model.push_to_hub_gguf(
+        f"hf://buckets/{HF_BUCKET_NAME}/Nemotron-3-Super-Opus-Reasoning-GGUF",
+        tokenizer,
+        quantization_method="q4_k_m"
+    )
+    print("GGUF successfully uploaded to HF Storage Bucket!")
+except Exception as e:
+    print(f"Failed to push GGUF (check HF Token). Error: {e}")
+print("All tasks completed.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
+huggingface_hub[cli]
+trl<0.9.0
+peft
+accelerate
+bitsandbytes
+datasets
+transformers