Spaces:
Sleeping
Sleeping
File size: 1,568 Bytes
63289b7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | import os
import pandas as pd
import sys,os
# sys.path.append(os.path.abspath("src"))
project_root = os.path.dirname(os.path.dirname(__file__))
sys.path.append(project_root)
from src.data.load_data import load_data
from src.data.preprocess import preprocess_data
from src.features.build_features import build_features
from src.utils.dataframe_validator import validate_dataframe
data_path = "C:\\Users\\Wind\\Downloads\\archive (10)\\WA_Fn-UseC_-Telco-Customer-Churn.csv"
target_col = "Churn"
def main():
print("Loading Data...\n")
df = load_data(data_path)
print(df.head(3))
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df["MonthlyCharges"] = pd.to_numeric(df["MonthlyCharges"], errors="coerce")
# Drop rows with missing numeric values
df = df.dropna(subset=["TotalCharges", "MonthlyCharges"])
print("Validating Data\n")
valid,errors = validate_dataframe(df)
if errors:
print("Printing first 5 errors...")
for idx,err in errors[:5]:
print(f"Row: {idx}, Error: {err}")
print("Preprocessing Data...\n")
df_clean = preprocess_data(df,target_col=target_col)
print(df_clean.head(3))
print("Building Features\n")
df_features = build_features(df_clean,target_col=target_col)
print(df_features.head(3))
print("Phase one done !")
output_path = "data/processed/train_processed.csv"
df_features.to_csv(output_path, index=False)
print(f"Saved processed data to {output_path}")
if __name__ == "__main__":
main()
|