| import pandas as pd |
| import numpy as np |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.model_selection import train_test_split |
| from sklearn.svm import SVC |
| from sklearn.metrics import classification_report |
| from sklearn.pipeline import Pipeline |
| from sklearn.compose import ColumnTransformer |
| from sklearn.preprocessing import StandardScaler |
| import joblib |
|
|
| |
| data = pd.read_csv(load_dataset("nikesh66/Sarcasm-dataset")) |
| data['user_feature'] = data['user_feature'].fillna(0) |
|
|
| from datasets import load_dataset |
|
|
|
|
|
|
| |
| text_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english') |
| scaler = StandardScaler() |
| preprocessor = ColumnTransformer( |
| transformers=[ |
| ('text', text_vectorizer, 'text'), |
| ('user_features', scaler, ['user_feature']), |
| ] |
| ) |
|
|
| |
|
|
|
|