| |
| |
|
|
| from sklearn.ensemble import RandomForestClassifier |
| from sklearn.metrics import roc_auc_score |
|
|
| |
|
|
|
|
| def recursive_feature_elimination_rf(X_train,y_train,X_test,y_test, |
| tol=0.001,max_depth=None, |
| class_weight=None, |
| top_n=15,n_estimators=50,random_state=0): |
| |
| |
| features_to_remove = [] |
| count = 1 |
| |
| model_all_features = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth, |
| random_state=random_state,class_weight=class_weight, |
| n_jobs=-1) |
| model_all_features.fit(X_train, y_train) |
| y_pred_test = model_all_features.predict_proba(X_test)[:, 1] |
| auc_score_all = roc_auc_score(y_test, y_pred_test) |
| |
| for feature in X_train.columns: |
| print() |
| print('testing feature: ', feature, ' which is feature ', count, |
| ' out of ', len(X_train.columns)) |
| count += 1 |
| model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth, |
| random_state=random_state,class_weight=class_weight, |
| n_jobs=-1) |
| |
| |
| |
| model.fit(X_train.drop(features_to_remove + [feature], axis=1), y_train) |
| y_pred_test = model.predict_proba( |
| X_test.drop(features_to_remove + [feature], axis=1))[:, 1] |
| auc_score_int = roc_auc_score(y_test, y_pred_test) |
| print('New Test ROC AUC={}'.format((auc_score_int))) |
| |
| |
| print('All features Test ROC AUC={}'.format((auc_score_all))) |
| |
| |
| diff_auc = auc_score_all - auc_score_int |
| |
| |
| if diff_auc >= tol: |
| print('Drop in ROC AUC={}'.format(diff_auc)) |
| print('keep: ', feature) |
| |
| else: |
| print('Drop in ROC AUC={}'.format(diff_auc)) |
| print('remove: ', feature) |
| |
| |
| |
| |
| auc_score_all = auc_score_int |
| |
| |
| features_to_remove.append(feature) |
| print('DONE!!') |
| print('total features to remove: ', len(features_to_remove)) |
| features_to_keep = [x for x in X_train.columns if x not in features_to_remove] |
| print('total features to keep: ', len(features_to_keep)) |
| |
| return features_to_keep |
|
|
|
|
| def recursive_feature_addition_rf(X_train,y_train,X_test,y_test, |
| tol=0.001,max_depth=None, |
| class_weight=None, |
| top_n=15,n_estimators=50,random_state=0): |
| |
| |
| features_to_keep = [X_train.columns[0]] |
| count = 1 |
| |
| model_one_feature = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth, |
| random_state=random_state,class_weight=class_weight, |
| n_jobs=-1) |
| model_one_feature.fit(X_train[[X_train.columns[0]]], y_train) |
| y_pred_test = model_one_feature.predict_proba(X_test[[X_train.columns[0]]])[:, 1] |
| auc_score_all = roc_auc_score(y_test, y_pred_test) |
| |
| for feature in X_train.columns[1:]: |
| print() |
| print('testing feature: ', feature, ' which is feature ', count, |
| ' out of ', len(X_train.columns)) |
| count += 1 |
| model = RandomForestClassifier(n_estimators=n_estimators,max_depth=max_depth, |
| random_state=random_state,class_weight=class_weight, |
| n_jobs=-1) |
| |
| |
| |
| model.fit(X_train[features_to_keep + [feature]], y_train) |
| y_pred_test = model.predict_proba( |
| X_test[features_to_keep + [feature]])[:, 1] |
| auc_score_int = roc_auc_score(y_test, y_pred_test) |
| print('New Test ROC AUC={}'.format((auc_score_int))) |
| |
| |
| print('All features Test ROC AUC={}'.format((auc_score_all))) |
| |
| |
| diff_auc = auc_score_int - auc_score_all |
| |
| |
| if diff_auc >= tol: |
| |
| |
| |
| print('Increase in ROC AUC={}'.format(diff_auc)) |
| print('keep: ', feature) |
| auc_score_all = auc_score_int |
| features_to_keep.append(feature) |
| else: |
| print('Increase in ROC AUC={}'.format(diff_auc)) |
| print('remove: ', feature) |
|
|
| print('DONE!!') |
| print('total features to keep: ', len(features_to_keep)) |
| |
| return features_to_keep |