| import tensorflow as tf |
| from transformers import TFAutoModelForCausalLM, AutoTokenizer |
| import datasets |
| from tensorflow.keras.optimizers import Adam |
| from tensorflow.keras.losses import SparseCategoricalCrossentropy |
| import numpy as np |
|
|
| class VedaTrainer: |
| """ |
| Advanced training pipeline for VEDA LLM |
| """ |
| |
| def __init__(self, base_model="gpt2"): |
| self.base_model = base_model |
| self.tokenizer = AutoTokenizer.from_pretrained(base_model) |
| self.model = None |
| |
| |
| if self.tokenizer.pad_token is None: |
| self.tokenizer.pad_token = self.tokenizer.eos_token |
| |
| |
| special_tokens = { |
| "pad_token": "[VEDA_PAD]", |
| "bos_token": "[VEDA_START]", |
| "eos_token": "[VEDA_END]", |
| "unk_token": "[VEDA_UNK]" |
| } |
| |
| self.tokenizer.add_special_tokens(special_tokens) |
| |
| def prepare_veda_dataset(self, dataset_name="wikitext", dataset_config="wikitext-2-raw-v1"): |
| """Prepare dataset for VEDA training""" |
| |
| print("π Loading dataset for VEDA training...") |
| dataset = datasets.load_dataset(dataset_name, dataset_config) |
| |
| def tokenize_function(examples): |
| |
| texts = [f"[VEDA_START] {text} [VEDA_END]" for text in examples["text"]] |
| |
| return self.tokenizer( |
| texts, |
| truncation=True, |
| padding=True, |
| max_length=256, |
| return_tensors="tf" |
| ) |
| |
| tokenized_dataset = dataset.map(tokenize_function, batched=True) |
| return tokenized_dataset |
| |
| def create_veda_model(self): |
| """Create VEDA model with custom architecture""" |
| |
| print("ποΈ Building VEDA model...") |
| |
| |
| self.model = TFAutoModelForCausalLM.from_pretrained(self.base_model) |
| self.model.resize_token_embeddings(len(self.tokenizer)) |
| |
| |
| optimizer = Adam( |
| learning_rate=3e-5, |
| beta_1=0.9, |
| beta_2=0.95, |
| epsilon=1e-9 |
| ) |
| |
| loss = SparseCategoricalCrossentropy(from_logits=True) |
| |
| self.model.compile( |
| optimizer=optimizer, |
| loss=loss, |
| metrics=['accuracy'] |
| ) |
| |
| return self.model |
| |
| def train_veda(self, dataset, epochs=3, batch_size=4): |
| """Train VEDA model""" |
| |
| model = self.create_veda_model() |
| |
| print("π― Starting VEDA training...") |
| |
| |
| train_data = dataset["train"].to_tf_dataset( |
| columns=["input_ids", "attention_mask", "labels"], |
| shuffle=True, |
| batch_size=batch_size |
| ) |
| |
| |
| history = model.fit( |
| train_data, |
| epochs=epochs, |
| validation_split=0.1 |
| ) |
| |
| print("β
VEDA training completed!") |
| |
| return model, history |
| |
| def save_veda_model(self, model, path="./veda_model"): |
| """Save trained VEDA model""" |
| |
| print(f"πΎ Saving VEDA model to {path}...") |
| model.save_pretrained(path) |
| self.tokenizer.save_pretrained(path) |
| print("β
VEDA model saved!") |
|
|
| |
| if __name__ == "__main__": |
| trainer = VedaTrainer() |
| dataset = trainer.prepare_veda_dataset() |
| model, history = trainer.train_veda(dataset) |
| trainer.save_veda_model(model) |