| from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments |
| import pandas as pd |
| from datasets import Dataset |
| from transformers import BertTokenizerFast |
|
|
| |
| data = [ |
| {"text": "¿Qué hamburguesas tienen?", "label": 0}, |
| {"text": "¿Qué combos con papas tienen?", "label": 1}, |
| {"text": "¿Qué tipos de refrescos tienen?", "label": 2}, |
| {"text": "¿Cómo hago una orden?", "label": 3} |
| ] |
|
|
| df = pd.DataFrame(data) |
| dataset = Dataset.from_pandas(df) |
|
|
| |
| |
|
|
| tokenizer = BertTokenizerFast.from_pretrained("Sebastian2903/SMARTORDERIA") |
| |
|
|
| def preprocess_function(examples): |
| return tokenizer(examples['text'], truncation=True, padding=True) |
|
|
| tokenized_dataset = dataset.map(preprocess_function, batched=True) |
|
|
| |
| train_test_split = tokenized_dataset.train_test_split(test_size=0.1) |
| train_dataset = train_test_split['train'] |
| eval_dataset = train_test_split['test'] |
|
|
| |
| model = AutoModelForSequenceClassification.from_pretrained("Sebastian2903/SMARTORDERIA", num_labels=4) |
|
|
| |
| training_args = TrainingArguments( |
| output_dir='./results', |
| evaluation_strategy="epoch", |
| per_device_train_batch_size=16, |
| per_device_eval_batch_size=16, |
| num_train_epochs=3, |
| weight_decay=0.01, |
| ) |
|
|
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| train_dataset=train_dataset, |
| eval_dataset=eval_dataset, |
| ) |
|
|
| |
| trainer.train() |
|
|
| |
| model.save_pretrained("Sebastian2903/SMARTORDERIA") |
| tokenizer.save_pretrained("Sebastian2903/SMARTORDERIA") |
|
|