File size: 947 Bytes
e93c178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from sklearn.preprocessing import LabelEncoder

def feature_engineering(df):

    # Loan_ID is just an identifier, so we remove it
    if "Loan_ID" in df.columns:
        df = df.drop("Loan_ID", axis=1)

    # create some useful new features
    df["Total_Income"] = df["ApplicantIncome"] + df["CoapplicantIncome"]
    df["EMI"] = (df["LoanAmount"] * 1000) / df["Loan_Amount_Term"]
    df["Balance_Income"] = df["Total_Income"] - df["EMI"]

    return df


def encode_data(df):

    encoders = {}

    # convert target (Y/N) into numeric
    target_encoder = LabelEncoder()
    df["Loan_Status"] = target_encoder.fit_transform(df["Loan_Status"])

    # encode categorical columns
    cols = ["Gender", "Married", "Dependents", "Education", "Self_Employed", "Property_Area"]

    for col in cols:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))
        encoders[col] = le

    return df, encoders, target_encoder