{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix, roc_curve\n", "from imblearn.combine import SMOTEENN\n", "from sklearn.preprocessing import MinMaxScaler\n", "import joblib\n", "import matplotlib.pyplot as plt\n", "from yellowbrick.classifier import ConfusionMatrix" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: yellowbrick in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (1.5)\n", "Requirement already satisfied: matplotlib!=3.0.0,>=2.0.2 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (3.10.0)\n", "Requirement already satisfied: scipy>=1.0.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (1.15.1)\n", "Requirement already satisfied: scikit-learn>=1.0.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (1.6.1)\n", "Requirement already satisfied: numpy>=1.16.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (2.2.2)\n", "Requirement already satisfied: cycler>=0.10.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (0.12.1)\n", "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (1.3.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (4.55.4)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (1.4.8)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\user\\appdata\\roaming\\python\\python313\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (24.2)\n", "Requirement already satisfied: pillow>=8 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (11.1.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (3.2.1)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\user\\appdata\\roaming\\python\\python313\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (2.9.0.post0)\n", "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn>=1.0.0->yellowbrick) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn>=1.0.0->yellowbrick) (3.5.0)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\user\\appdata\\roaming\\python\\python313\\site-packages (from python-dateutil>=2.7->matplotlib!=3.0.0,>=2.0.2->yellowbrick) (1.17.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "[notice] A new release of pip is available: 24.3.1 -> 25.0.1\n", "[notice] To update, run: python.exe -m pip install --upgrade pip\n" ] } ], "source": [ "import sys\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "plt.style.use(\"ggplot\")\n", "plt.rcParams['figure.figsize'] = (12, 8)\n", "import seaborn as sns\n", "sns.set(style='whitegrid', color_codes=True)\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "from sklearn.feature_selection import chi2,f_classif, mutual_info_classif, SelectKBest\n", "from sklearn.model_selection import train_test_split, cross_val_score\n", "from sklearn.model_selection import RepeatedStratifiedKFold\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.metrics import confusion_matrix\n", "\n", "%pip install yellowbrick\n", "from yellowbrick.model_selection import CVScores" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from yellowbrick.classifier import confusion_matrix\n", "\n", "def vis_conf(model, X_test, y_test):\n", " plt.figure(figsize=(6, 5))\n", " visualizer = confusion_matrix(\n", " model,\n", " X_test, y_test,\n", " is_fitted=True,\n", " classes=['Negative', 'Positive']\n", " )\n", " visualizer.show();" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def test_results(model, X_test, y_test):\n", " from sklearn.metrics import confusion_matrix\n", " y_pred = model.predict(X_test)\n", " tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n", "\n", " accuracy = (tp + tn) / (tp + fp + tn + fn)\n", " print(\"Accuracy: \", '{:.2f}'.format(accuracy * 100))\n", " print(\"True Negative:\", tn)\n", " print(\"True Positive:\", tp)\n", " print(\"False Positive:\", fp)\n", " print(\"False Negative:\", fn)\n", " print(\"\\n-------------------------------------------------------\")\n", "\n", " \n", " print(\"Negative Class Results (No PCOS)\")\n", " precision_n = tn / (tn + fp) \n", " recall_n = tn / (tn + fp) \n", " f1_score_n = 2 * (precision_n * recall_n) / (precision_n + recall_n) if (precision_n + recall_n) > 0 else 0\n", " print(\"Precision (N): \", '{:.2f}'.format(precision_n * 100))\n", " print(\"Recall (N): \", '{:.2f}'.format(recall_n * 100))\n", " print(\"F1 Score (N): \", '{:.2f}'.format(f1_score_n * 100))\n", " print(\"\\n-------------------------------------------------------\")\n", "\n", " \n", " print(\"Positive Class Results (PCOS)\")\n", " precision_p = tp / (tp + fp) \n", " recall_p = tp / (tp + fn) \n", " f1_score_p = 2 * (precision_p * recall_p) / (precision_p + recall_p) if (precision_p + recall_p) > 0 else 0\n", " print(\"Precision (P): \", '{:.2f}'.format(precision_p * 100))\n", " print(\"Recall (P): \", '{:.2f}'.format(recall_p * 100))\n", " print(\"F1 Score (P): \", '{:.2f}'.format(f1_score_p * 100))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "pcos_data = pd.read_csv(\"new_pcos_dataset.csv\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "non_clinical_data = pcos_data[\n", " [\n", " \"PCOS (Y/N)\", # Target\n", " \"Skin darkening (Y/N)\",\n", " \"hair growth(Y/N)\",\n", " \"Weight gain(Y/N)\",\n", " \"Fast food (Y/N)\",\n", " \"Pimples(Y/N)\",\n", " \"Age (yrs)\",\n", " \"BMI\",\n", " ]\n", "]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# --- Drop Missing Values ---\n", "non_clinical_data = non_clinical_data.dropna()\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "X = non_clinical_data.drop(columns=[\"PCOS (Y/N)\"])\n", "y = non_clinical_data[\"PCOS (Y/N)\"]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "\n", "smoteenn = SMOTEENN(random_state=42)\n", "X_resampled, y_resampled = smoteenn.fit_resample(X, y)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "scaler = MinMaxScaler()\n", "X_train_scaled = scaler.fit_transform(X_train)\n", "X_test_scaled = scaler.transform(X_test)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
LogisticRegression(max_iter=1000, random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression(max_iter=1000, random_state=0)
GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=42, shuffle=True),\n",
" error_score='raise',\n",
" estimator=LogisticRegression(max_iter=1000, random_state=42),\n",
" n_jobs=-1,\n",
" param_grid=[{'C': [0.1, 1, 10], 'class_weight': ['balanced'],\n",
" 'penalty': ['l1', 'l2'], 'solver': ['liblinear']},\n",
" {'C': [0.1, 1, 10], 'class_weight': ['balanced'],\n",
" 'l1_ratio': [0.5],\n",
" 'penalty': ['l1', 'l2', 'elasticnet'],\n",
" 'solver': ['saga']}],\n",
" scoring='f1_weighted')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=42, shuffle=True),\n",
" error_score='raise',\n",
" estimator=LogisticRegression(max_iter=1000, random_state=42),\n",
" n_jobs=-1,\n",
" param_grid=[{'C': [0.1, 1, 10], 'class_weight': ['balanced'],\n",
" 'penalty': ['l1', 'l2'], 'solver': ['liblinear']},\n",
" {'C': [0.1, 1, 10], 'class_weight': ['balanced'],\n",
" 'l1_ratio': [0.5],\n",
" 'penalty': ['l1', 'l2', 'elasticnet'],\n",
" 'solver': ['saga']}],\n",
" scoring='f1_weighted')LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,\n",
" random_state=42, solver='liblinear')LogisticRegression(C=0.1, class_weight='balanced', max_iter=1000,\n",
" random_state=42, solver='liblinear')