Spaces:
Sleeping
Sleeping
File size: 947 Bytes
e93c178 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | from sklearn.preprocessing import LabelEncoder
def feature_engineering(df):
# Loan_ID is just an identifier, so we remove it
if "Loan_ID" in df.columns:
df = df.drop("Loan_ID", axis=1)
# create some useful new features
df["Total_Income"] = df["ApplicantIncome"] + df["CoapplicantIncome"]
df["EMI"] = (df["LoanAmount"] * 1000) / df["Loan_Amount_Term"]
df["Balance_Income"] = df["Total_Income"] - df["EMI"]
return df
def encode_data(df):
encoders = {}
# convert target (Y/N) into numeric
target_encoder = LabelEncoder()
df["Loan_Status"] = target_encoder.fit_transform(df["Loan_Status"])
# encode categorical columns
cols = ["Gender", "Married", "Dependents", "Education", "Self_Employed", "Property_Area"]
for col in cols:
le = LabelEncoder()
df[col] = le.fit_transform(df[col].astype(str))
encoders[col] = le
return df, encoders, target_encoder |