| import gradio as gr |
| from transformers import pipeline, Trainer, TrainingArguments, AutoModelForCausalLM, AutoTokenizer |
| import torch |
| import pandas as pd |
|
|
| |
| model_name = "huggingface/transformer_model" |
| model = AutoModelForCausalLM.from_pretrained(model_name) |
| tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
| |
| def upload_and_finetune(file): |
| |
| file_path = file.name |
| data = pd.read_csv(file_path) |
| |
| |
| |
| texts = data['text'].tolist() |
| encodings = tokenizer(texts, truncation=True, padding=True, return_tensors="pt") |
|
|
| |
| class CustomDataset(torch.utils.data.Dataset): |
| def __init__(self, encodings): |
| self.encodings = encodings |
|
|
| def __len__(self): |
| return len(self.encodings['input_ids']) |
|
|
| def __getitem__(self, idx): |
| item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} |
| return item |
|
|
| train_dataset = CustomDataset(encodings) |
|
|
| |
| training_args = TrainingArguments( |
| output_dir='./results', |
| num_train_epochs=3, |
| per_device_train_batch_size=4, |
| logging_dir='./logs', |
| ) |
|
|
| |
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| train_dataset=train_dataset, |
| ) |
|
|
| |
| trainer.train() |
|
|
| |
| model.save_pretrained('./fine_tuned_model') |
| |
| return f"File {file.name} uploaded and model fine-tuned successfully!" |
|
|
| |
| interface = gr.Interface( |
| fn=upload_and_finetune, |
| inputs=[gr.File(label="Upload Dataset for Fine-Tuning", file_count="single", type="file")], |
| outputs="text" |
| ) |
|
|
| if __name__ == "__main__": |
| interface.launch() |
|
|