| import torch |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments |
|
|
| |
| questions_file = 'C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained\\New folder (3)\\questions.txt' |
|
|
| |
| with open(questions_file, 'r') as f: |
| questions = f.read().splitlines() |
|
|
| |
| def custom_tokenizer(text): |
| """ |
| Define your custom tokenizer function here |
| """ |
| return text.split() |
|
|
| |
| tokenized_questions = [custom_tokenizer(question) for question in questions] |
|
|
| |
| model = AutoModelForSeq2SeqLM.from_pretrained('C:\\Users\\money\\OneDrive\\Pictures\\Blank Model\\untrained model.pt') |
|
|
| |
| training_args = TrainingArguments( |
| output_dir='./results', |
| evaluation_strategy='epoch', |
| learning_rate=2e-4, |
| per_device_train_batch_size=16, |
| per_device_eval_batch_size=16, |
| num_train_epochs=1, |
| weight_decay=0.01, |
| ) |
|
|
| |
| trainer = Trainer( |
| model=model, |
| args=training_args, |
| train_dataset=tokenized_questions, |
| ) |
| trainer.train() |
|
|
| |
| model_path = './trained_model' |
| model.save_pretrained(model_path) |
|
|