| import pandas as pd
|
| import matplotlib.pyplot as plt
|
| import numpy as np
|
| import nltk
|
| from nltk.tokenize import word_tokenize
|
| import pickle
|
| import tensorflow as tf
|
| import tensorflow as tf
|
| from tensorflow.keras.models import Sequential
|
| from tensorflow.keras.layers import Dense
|
| from tensorflow.keras.preprocessing.sequence import pad_sequences
|
| from tensorflow.keras.preprocessing.text import Tokenizer
|
| from sklearn.model_selection import train_test_split
|
| nltk.download('punkt_tab')
|
|
|
| DatasetLocation = r"datset.csv"
|
| dataset = pd.read_csv(DatasetLocation)
|
| print("data loaded")
|
|
|
|
|
| x = dataset["text"]
|
| y = dataset["output"]
|
|
|
|
|
|
|
| Newy = y + 1
|
| Newy = Newy / 2
|
|
|
|
|
|
|
| y = Newy
|
| for i in range(len(y)):
|
| if np.isnan(y[i]):
|
| y[i] = 0
|
| print(y)
|
|
|
| tokenizer = Tokenizer()
|
|
|
|
|
| tokenizer.fit_on_texts(x)
|
|
|
| TokenX = tokenizer.texts_to_sequences(x)
|
|
|
|
|
| with open("tokenizer.pkl","wb") as handle:
|
| pickle.dump(tokenizer,handle,protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
| print(TokenX)
|
|
|
|
|
|
|
| max_length = 100
|
| X_Padded = pad_sequences(TokenX,maxlen= max_length)
|
|
|
| print("data padded correctly")
|
|
|
|
|
| X_train, X_val, y_train, y_val = train_test_split(X_Padded, y, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
| model = Sequential([
|
| Dense(256, activation='relu'),
|
| Dense(128, activation='relu'),
|
| Dense(1, activation='sigmoid')
|
| ])
|
| from tensorflow.keras.optimizers import Adam
|
|
|
| model.compile(optimizer=Adam(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
|
| print("model defined correctly")
|
| print(np.isnan(y).sum())
|
|
|
| epochs = 3
|
| i = 0
|
| TrainLoss= []
|
| ValLoss= []
|
| Num = []
|
| while i < epochs:
|
| history = model.fit(X_Padded, y, epochs=100, verbose=2)
|
| Train_loss = history.history['loss'][-1]
|
| Train_accuracy = history.history['accuracy'][-1]
|
| Val_loss, Val_accuracy = model.evaluate(X_val, y_val)
|
| ValLoss.append(Val_loss)
|
| TrainLoss.append(Train_loss)
|
| Num.append(i)
|
| i += 1
|
|
|
| model.save("model.h5")
|
|
|
| plt.figure(figsize=(10, 6))
|
| plt.plot(Num, ValLoss, label='Validation Loss', color='orange')
|
| plt.plot(Num, TrainLoss, label='Training Loss', color='blue')
|
| plt.title('Training and Validation Loss')
|
| plt.xlabel('Epochs')
|
| plt.ylabel('Loss')
|
| plt.legend()
|
| plt.grid()
|
| plt.show() |