Spaces:

Eeppa
/

Llama-3.2-1B-Codex

Configuration error

App Files Files Community

Eeppa commited on 7 days ago

Commit

9dbd0eb

verified ·

1 Parent(s): 8ae90bd

Delete finetune.py

Browse files

Files changed (1) hide show

finetune.py +0 -67

finetune.py DELETED Viewed

@@ -1,67 +0,0 @@
-# finetune.py - Run this on a GPU machine (Colab, RunPod, etc.)
-from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
-from peft import LoraConfig, get_peft_model
-from datasets import load_dataset
-import torch
-# Configuration
-MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
-OUTPUT_NAME = "Llama-3.2-1B-Codex"  # Must start with "Llama"
-DATASET_NAME = "bigcode/the-stack-dedup"  # Or use your own dataset
-# Load model
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-tokenizer.pad_token = tokenizer.eos_token
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=torch.bfloat16,
-    device_map="auto"
-)
-# LoRA config
-lora_config = LoraConfig(
-    r=16,
-    lora_alpha=32,
-    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
-    lora_dropout=0.1,
-    task_type="CAUSAL_LM"
-)
-model = get_peft_model(model, lora_config)
-# Load and prepare dataset
-dataset = load_dataset("json", data_files="your_code_data.json")  # Replace with your data
-def format_code_example(example):
-    messages = [
-        {"role": "system", "content": "You are an expert programmer."},
-        {"role": "user", "content": example["instruction"]},
-        {"role": "assistant", "content": example["code"]}
-    ]
-    return {"text": tokenizer.apply_chat_template(messages, tokenize=False)}
-dataset = dataset.map(format_code_example)
-# Train
-training_args = TrainingArguments(
-    output_dir="./codex-finetuned",
-    num_train_epochs=3,
-    per_device_train_batch_size=1,
-    gradient_accumulation_steps=4,
-    learning_rate=2e-4,
-    fp16=True,
-    logging_steps=10,
-    save_strategy="epoch",
-    push_to_hub=True,
-    hub_model_id=OUTPUT_NAME,
-)
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    train_dataset=dataset["train"],
-)
-trainer.train()
-trainer.push_to_hub()