| |
| """diarc.ipynb |
| |
| Automatically generated by Colaboratory. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/1Jyccp5Aeml-7oZABbACY2VTE9iQJg9Pe |
| |
| # Bismillahir Rahmaanir Raheem |
| # Almadadh Ya Gause RadiAllahu Ta'alah Anh - Ameen |
| |
| # <font color=grey>DIabetes-related Amputation Risk Calculator (DIARC)</font> |
| <b>_by Zakia Salod_</b> |
| """ |
|
|
| !pip install pycaret |
|
|
| from pycaret.utils import version |
| version() |
|
|
| from pycaret.utils import enable_colab |
| enable_colab() |
|
|
| import numpy as np |
| import pandas as pd |
| import matplotlib.pyplot as plt |
| import seaborn as sns |
|
|
| |
| np.random.seed(1234) |
|
|
| dataset = pd.read_excel('amputation_dataset.xlsx') |
|
|
| print(dataset['AMPUTATION'].value_counts()) |
|
|
| ax = sns.countplot(x="AMPUTATION", data=dataset) |
|
|
| |
| dataset.duplicated(keep='first').sum() |
|
|
| |
| |
| dataset = dataset.drop_duplicates(keep='first') |
|
|
| print(dataset['AMPUTATION'].value_counts()) |
|
|
| ax = sns.countplot(x="AMPUTATION", data=dataset) |
|
|
| dataset.head() |
|
|
| |
| |
| shuffled_dataset = dataset.sample(frac=1, random_state=4) |
|
|
| |
| amputation_dataset = shuffled_dataset.loc[shuffled_dataset['AMPUTATION'] == 1] |
|
|
|
|
| |
| non_amputation_dataset = shuffled_dataset.loc[shuffled_dataset['AMPUTATION'] == 0].sample(n=105,random_state=42) |
|
|
| |
| dataset = pd.concat([amputation_dataset, non_amputation_dataset]) |
|
|
| print(dataset['AMPUTATION'].value_counts()) |
|
|
| ax = sns.countplot(x="AMPUTATION", data=dataset) |
|
|
| dataset.to_excel('amputation_removed_duplicates_and_balanced.xlsx') |
|
|
| from pycaret.classification import * |
|
|
| clf = setup(data = dataset, target = 'AMPUTATION', session_id = 42) |
|
|
| |
| get_config('X_train') |
| |
| |
| |
| |
| |
|
|
| |
| get_config('y_train') |
|
|
| best_model = compare_models(sort = 'AUC') |
|
|
| |
| |
| nb = create_model('nb') |
| bagged_nb = ensemble_model(nb, method='Bagging') |
| lr = create_model('lr') |
| bagged_lr = ensemble_model(lr, method='Bagging') |
| lda = create_model('lda') |
| bagged_lda = ensemble_model(lda, method='Bagging') |
|
|
| rf = create_model('rf') |
| bagged_rf = ensemble_model(rf, method='Bagging') |
| ada = create_model('ada') |
| bagged_ada = ensemble_model(ada, method='Bagging') |
|
|
|
|
| blend_specific = blend_models(estimator_list = [bagged_nb, bagged_lr, bagged_lda, bagged_rf, bagged_ada]) |
|
|
| |
| plot_model(blend_specific) |
|
|
| |
| tuned_blend_specific = tune_model(blend_specific) |
|
|
| evaluate_model(tuned_blend_specific) |
|
|
| tuned_blend_specific_predictions = predict_model(tuned_blend_specific) |
|
|
| |
| final_tuned_blend_specific = finalize_model(tuned_blend_specific) |
|
|
| |
| |
| save_model(tuned_blend_specific, "tuned_blend_specific_model_19112021", verbose=True) |
|
|
| |
| get_config('X_test') |
|
|
| |
| get_config('y_test') |
|
|
| dataset2 = pd.read_excel('amputation_removed_duplicates_and_balanced.xlsx') |
|
|
| !pip install pandas-profiling |
|
|
| from pandas_profiling import ProfileReport |
|
|
| profile = ProfileReport(dataset2, title="Pandas Profiling Report") |
|
|
| profile.to_file("amputation_removed_duplicates_and_balanced_report.html") |