{ "cells": [ { "cell_type": "code", "execution_count": 198, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import classification_report\n", "from imblearn.combine import SMOTEENN\n", "from sklearn.model_selection import GridSearchCV\n" ] }, { "cell_type": "code", "execution_count": 199, "metadata": {}, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "from yellowbrick.classifier import confusion_matrix\n", "\n", "def vis_conf(model, X_test, y_test):\n", " plt.figure(figsize=(6, 5))\n", " visualizer = confusion_matrix(\n", " model,\n", " X_test, y_test,\n", " is_fitted=True,\n", " classes=['Negative', 'Positive']\n", " )\n", " visualizer.show();" ] }, { "cell_type": "code", "execution_count": 200, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: yellowbrick in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (1.5)\n", "Requirement already satisfied: matplotlib!=3.0.0,>=2.0.2 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (3.10.0)\n", "Requirement already satisfied: scipy>=1.0.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (1.15.1)\n", "Requirement already satisfied: scikit-learn>=1.0.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (1.6.1)\n", "Requirement already satisfied: numpy>=1.16.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (2.2.2)\n", "Requirement already satisfied: cycler>=0.10.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from yellowbrick) (0.12.1)\n", "Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (1.3.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (4.55.4)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (1.4.8)\n", "Requirement already satisfied: packaging>=20.0 in c:\\users\\user\\appdata\\roaming\\python\\python313\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (24.2)\n", "Requirement already satisfied: pillow>=8 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (11.1.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (3.2.1)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\user\\appdata\\roaming\\python\\python313\\site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (2.9.0.post0)\n", "Requirement already satisfied: joblib>=1.2.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn>=1.0.0->yellowbrick) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in c:\\users\\user\\appdata\\local\\programs\\python\\python313\\lib\\site-packages (from scikit-learn>=1.0.0->yellowbrick) (3.5.0)\n", "Requirement already satisfied: six>=1.5 in c:\\users\\user\\appdata\\roaming\\python\\python313\\site-packages (from python-dateutil>=2.7->matplotlib!=3.0.0,>=2.0.2->yellowbrick) (1.17.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "[notice] A new release of pip is available: 24.3.1 -> 25.0.1\n", "[notice] To update, run: python.exe -m pip install --upgrade pip\n" ] } ], "source": [ "import sys\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "plt.style.use(\"ggplot\")\n", "plt.rcParams['figure.figsize'] = (12, 8)\n", "import seaborn as sns\n", "sns.set(style='whitegrid', color_codes=True)\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "from sklearn.feature_selection import chi2,f_classif, mutual_info_classif, SelectKBest\n", "from sklearn.model_selection import train_test_split, cross_val_score\n", "from sklearn.model_selection import RepeatedStratifiedKFold\n", "from sklearn.model_selection import StratifiedKFold\n", "from sklearn.metrics import confusion_matrix\n", "\n", "%pip install yellowbrick\n", "from yellowbrick.model_selection import CVScores" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [], "source": [ "def test_results(model, X_test, y_test):\n", " from sklearn.metrics import confusion_matrix\n", " y_pred = model.predict(X_test)\n", " tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()\n", "\n", " accuracy = (tp + tn) / (tp + fp + tn + fn)\n", " print(\"Accuracy: \", '{:.2f}'.format(accuracy * 100))\n", " print(\"True Negative:\", tn)\n", " print(\"True Positive:\", tp)\n", " print(\"False Positive:\", fp)\n", " print(\"False Negative:\", fn)\n", " print(\"\\n-------------------------------------------------------\")\n", "\n", " # Negative Class (No PCOS)\n", " print(\"Negative Class Results (No PCOS)\")\n", " precision_n = tn / (tn + fp) # Corrected formula for Precision (N)\n", " recall_n = tn / (tn + fp) # Correct formula for Recall (N)\n", " f1_score_n = 2 * (precision_n * recall_n) / (precision_n + recall_n) if (precision_n + recall_n) > 0 else 0\n", " print(\"Precision (N): \", '{:.2f}'.format(precision_n * 100))\n", " print(\"Recall (N): \", '{:.2f}'.format(recall_n * 100))\n", " print(\"F1 Score (N): \", '{:.2f}'.format(f1_score_n * 100))\n", " print(\"\\n-------------------------------------------------------\")\n", "\n", " # Positive Class (PCOS)\n", " print(\"Positive Class Results (PCOS)\")\n", " precision_p = tp / (tp + fp) # Correct formula for Precision (P)\n", " recall_p = tp / (tp + fn) # Correct formula for Recall (P)\n", " f1_score_p = 2 * (precision_p * recall_p) / (precision_p + recall_p) if (precision_p + recall_p) > 0 else 0\n", " print(\"Precision (P): \", '{:.2f}'.format(precision_p * 100))\n", " print(\"Recall (P): \", '{:.2f}'.format(recall_p * 100))\n", " print(\"F1 Score (P): \", '{:.2f}'.format(f1_score_p * 100))" ] }, { "cell_type": "code", "execution_count": 202, "metadata": {}, "outputs": [], "source": [ "pcos_data = pd.read_csv(\"new_pcos_dataset.csv\")\n" ] }, { "cell_type": "code", "execution_count": 203, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | PCOS (Y/N) | \n", "Follicle No. (R) | \n", "Follicle No. (L) | \n", "Skin darkening (Y/N) | \n", "hair growth(Y/N) | \n", "Weight gain(Y/N) | \n", "Cycle length(days) | \n", "AMH(ng/mL) | \n", "Fast food (Y/N) | \n", "Cycle(R/I) | \n", "FSH/LH | \n", "PRL(ng/mL) | \n", "Pimples(Y/N) | \n", "Age (yrs) | \n", "BMI | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "3 | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "5 | \n", "2.07 | \n", "1.0 | \n", "0 | \n", "2.160326 | \n", "45.16 | \n", "0 | \n", "28 | \n", "19.3 | \n", "
| 1 | \n", "0 | \n", "5 | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "5 | \n", "1.53 | \n", "0.0 | \n", "0 | \n", "6.174312 | \n", "20.09 | \n", "0 | \n", "36 | \n", "24.9 | \n", "
| 2 | \n", "1 | \n", "15 | \n", "13 | \n", "0 | \n", "0 | \n", "0 | \n", "5 | \n", "6.63 | \n", "1.0 | \n", "0 | \n", "6.295455 | \n", "10.52 | \n", "1 | \n", "33 | \n", "25.3 | \n", "
| 3 | \n", "0 | \n", "2 | \n", "2 | \n", "0 | \n", "0 | \n", "0 | \n", "5 | \n", "1.22 | \n", "0.0 | \n", "0 | \n", "3.415254 | \n", "36.90 | \n", "0 | \n", "37 | \n", "29.7 | \n", "
| 4 | \n", "0 | \n", "4 | \n", "3 | \n", "0 | \n", "0 | \n", "0 | \n", "5 | \n", "2.26 | \n", "0.0 | \n", "0 | \n", "4.422222 | \n", "30.09 | \n", "0 | \n", "25 | \n", "20.1 | \n", "
GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=0, shuffle=True),\n",
" estimator=LogisticRegression(random_state=0), n_jobs=-1,\n",
" param_grid={'C': [0.01, 0.1, 1.0, 10.0],\n",
" 'class_weight': ['balanced'],\n",
" 'max_iter': [100, 200, 300],\n",
" 'penalty': ['l1', 'l2', 'elasticnet'],\n",
" 'solver': ['liblinear', 'saga']},\n",
" scoring='f1_weighted')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=0, shuffle=True),\n",
" estimator=LogisticRegression(random_state=0), n_jobs=-1,\n",
" param_grid={'C': [0.01, 0.1, 1.0, 10.0],\n",
" 'class_weight': ['balanced'],\n",
" 'max_iter': [100, 200, 300],\n",
" 'penalty': ['l1', 'l2', 'elasticnet'],\n",
" 'solver': ['liblinear', 'saga']},\n",
" scoring='f1_weighted')LogisticRegression(C=0.1, class_weight='balanced', random_state=0,\n",
" solver='saga')LogisticRegression(C=0.1, class_weight='balanced', random_state=0,\n",
" solver='saga')LogisticRegression(C=0.1, class_weight='balanced', random_state=0,\n",
" solver='saga')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. LogisticRegression(C=0.1, class_weight='balanced', random_state=0,\n",
" solver='saga')