Sairamr46
/

price-increase-churn-model

Model card Files Files and versions

price-increase-churn-model / data_loader.py

Sairamr46's picture

Upload data_loader.py with huggingface_hub

7585b30 verified 1 day ago

history blame contribute delete

1.37 kB

	"""Load and prepare dataset for price increase churn modeling."""
	import pandas as pd
	import numpy as np
	from datasets import load_dataset
	from config import DATASET_NAME, TARGET_COL, CUSTOMER_ID_COL


	def load_data():
	"""Load telco customer churn dataset from Hugging Face."""
	ds = load_dataset(DATASET_NAME)
	train_df = pd.DataFrame(ds['train'])
	val_df = pd.DataFrame(ds['validation'])
	test_df = pd.DataFrame(ds['test'])
	df = pd.concat([train_df, val_df, test_df], ignore_index=True)
	return df


	def clean_data(df):
	"""Clean and type-cast columns."""
	df = df.copy()
	# Total Charges may have spaces/empty strings
	df['Total Charges'] = pd.to_numeric(df['Total Charges'], errors='coerce')
	df['Total Charges'] = df['Total Charges'].fillna(0)

	# Ensure numeric types
	numeric_cols = ['Monthly Charge', 'Tenure in Months', 'Age', 'Number of Dependents',
	'Number of Referrals', 'Avg Monthly GB Download',
	'Avg Monthly Long Distance Charges', 'Total Long Distance Charges',
	'Total Extra Data Charges', 'Total Refunds', 'Total Revenue', 'CLTV',
	'Churn Score', 'Satisfaction Score', 'Population']
	for col in numeric_cols:
	if col in df.columns:
	df[col] = pd.to_numeric(df[col], errors='coerce')

	return df