| from sentence_transformers import SentenceTransformer |
| from sklearn.linear_model import LogisticRegression |
| import pickle |
| from sklearn.model_selection import train_test_split |
| import joblib |
|
|
| import pandas as pd |
|
|
| def get_embedding(text): |
| model_encode = SentenceTransformer('Alibaba-NLP/gte-base-en-v1.5', trust_remote_code=True) |
| embedding = model_encode.encode(text) |
| return embedding |
|
|
| def train_model(): |
| sample_data_df = pd.read_excel("sms_process_data_main.xlsx") |
| sample_data_df.dropna(subset=['MessageText', 'label'], inplace=True) |
| input = sample_data_df['MessageText'] |
| label = sample_data_df['label'] |
| X_train, X_test, y_train, y_test = train_test_split(input, label, test_size=0.2, random_state=42) |
| X_train_embeddings = get_embedding(X_train.tolist()) |
| log_reg_model = LogisticRegression( max_iter = 1000) |
| log_reg_model.fit(X_train_embeddings, y_train) |
| save_model(log_reg_model,'log_reg_model.pkl') |
| return log_reg_model |
|
|
| def save_model(model, filename): |
| with open(filename, 'wb') as model_file: |
| pickle.dump(model, model_file) |
| print(f"Model saved to {filename}") |
|
|
|
|
| def load_model(filename): |
| |
| with open(filename, 'rb') as model_file: |
| loaded_model = pickle.load(model_file) |
| print(f"Model loaded from {filename}") |
| return loaded_model |