Eeppa commited on
Commit
9dbd0eb
·
verified ·
1 Parent(s): 8ae90bd

Delete finetune.py

Browse files
Files changed (1) hide show
  1. finetune.py +0 -67
finetune.py DELETED
@@ -1,67 +0,0 @@
1
- # finetune.py - Run this on a GPU machine (Colab, RunPod, etc.)
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
3
- from peft import LoraConfig, get_peft_model
4
- from datasets import load_dataset
5
- import torch
6
-
7
- # Configuration
8
- MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
9
- OUTPUT_NAME = "Llama-3.2-1B-Codex" # Must start with "Llama"
10
- DATASET_NAME = "bigcode/the-stack-dedup" # Or use your own dataset
11
-
12
- # Load model
13
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
14
- tokenizer.pad_token = tokenizer.eos_token
15
-
16
- model = AutoModelForCausalLM.from_pretrained(
17
- MODEL_NAME,
18
- torch_dtype=torch.bfloat16,
19
- device_map="auto"
20
- )
21
-
22
- # LoRA config
23
- lora_config = LoraConfig(
24
- r=16,
25
- lora_alpha=32,
26
- target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
27
- lora_dropout=0.1,
28
- task_type="CAUSAL_LM"
29
- )
30
-
31
- model = get_peft_model(model, lora_config)
32
-
33
- # Load and prepare dataset
34
- dataset = load_dataset("json", data_files="your_code_data.json") # Replace with your data
35
-
36
- def format_code_example(example):
37
- messages = [
38
- {"role": "system", "content": "You are an expert programmer."},
39
- {"role": "user", "content": example["instruction"]},
40
- {"role": "assistant", "content": example["code"]}
41
- ]
42
- return {"text": tokenizer.apply_chat_template(messages, tokenize=False)}
43
-
44
- dataset = dataset.map(format_code_example)
45
-
46
- # Train
47
- training_args = TrainingArguments(
48
- output_dir="./codex-finetuned",
49
- num_train_epochs=3,
50
- per_device_train_batch_size=1,
51
- gradient_accumulation_steps=4,
52
- learning_rate=2e-4,
53
- fp16=True,
54
- logging_steps=10,
55
- save_strategy="epoch",
56
- push_to_hub=True,
57
- hub_model_id=OUTPUT_NAME,
58
- )
59
-
60
- trainer = Trainer(
61
- model=model,
62
- args=training_args,
63
- train_dataset=dataset["train"],
64
- )
65
-
66
- trainer.train()
67
- trainer.push_to_hub()