"""Business analysis and insights from the churn model."""
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import seaborn as sns
from data_loader import load_data, clean_data
from features import SubscriptionFeatureEngineer
from model import PriceIncreaseChurnModel
import joblib


def analyze_risk_segments(df, y_prob, y_true, output_dir='/app/price_increase_model'):
    """Analyze customer segments by churn risk."""
    df = df.copy()
    df['churn_prob'] = y_prob
    df['actual_churn'] = y_true.values
    
    # Risk tiers
    df['risk_tier'] = pd.cut(df['churn_prob'], 
                              bins=[0, 0.4, 0.7, 1.0],
                              labels=['Low', 'Medium', 'High'])
    
    print("\n" + "="*60)
    print("RISK SEGMENT ANALYSIS")
    print("="*60)
    
    segment_summary = df.groupby('risk_tier').agg({
        'churn_prob': ['count', 'mean'],
        'actual_churn': 'mean',
        'rmr': 'mean',
        'cltv': 'mean',
        'tenure': 'mean',
        'num_services': 'mean',
        'satisfaction_score': 'mean'
    }).round(3)
    print("\nSegment Summary:")
    print(segment_summary)
    
    # Revenue at risk
    df['monthly_revenue_at_risk'] = df['rmr'] * df['churn_prob']
    total_revenue_at_risk = df['monthly_revenue_at_risk'].sum()
    total_revenue = df['rmr'].sum()
    
    print(f"\nTotal Monthly Revenue: ${total_revenue:,.2f}")
    print(f"Monthly Revenue at Risk: ${total_revenue_at_risk:,.2f}")
    print(f"Revenue at Risk %: {total_revenue_at_risk/total_revenue*100:.1f}%")
    
    # Plot risk distribution
    plt.figure(figsize=(10, 6))
    
    plt.subplot(1, 2, 1)
    tier_counts = df['risk_tier'].value_counts()
    colors = ['#2ecc71', '#f39c12', '#e74c3c']
    plt.pie(tier_counts, labels=tier_counts.index, autopct='%1.1f%%', colors=colors)
    plt.title('Customer Distribution by Risk Tier')
    
    plt.subplot(1, 2, 2)
    sns.boxplot(data=df, x='risk_tier', y='rmr', palette=colors)
    plt.title('Monthly Revenue by Risk Tier')
    plt.ylabel('Monthly Revenue ($)')
    
    plt.tight_layout()
    plt.savefig(f'{output_dir}/risk_segments.png', dpi=150)
    plt.close()
    print(f"Risk segment plot saved to {output_dir}/risk_segments.png")
    
    return df


def analyze_feature_effects(df, model, output_dir='/app/price_increase_model'):
    """Analyze how key features affect churn probability."""
    classifier = model.pipeline.named_steps['classifier']
    preprocessor = model.pipeline.named_steps['preprocessor']
    
    # Get feature names
    cat_encoder = preprocessor.named_transformers_['cat'].named_steps['onehot']
    cat_names = list(cat_encoder.get_feature_names_out(model.categorical_features))
    feature_names = model.numeric_features + cat_names
    
    # Get feature importances
    importances = classifier.feature_importances_
    
    fi_df = pd.DataFrame({
        'feature': feature_names,
        'importance': importances
    }).sort_values('importance', ascending=False)
    
    print("\n" + "="*60)
    print("TOP 15 FEATURE IMPORTANCES")
    print("="*60)
    for i, row in fi_df.head(15).iterrows():
        print(f"{row['feature']:35s} {row['importance']:.4f}")
    
    return fi_df


def price_sensitivity_analysis(df, model, output_dir='/app/price_increase_model'):
    """Analyze how different price increase % affect churn risk."""
    print("\n" + "="*60)
    print("PRICE SENSITIVITY ANALYSIS")
    print("="*60)
    
    price_increases = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30]
    results = []
    
    # Sample 500 customers for speed
    sample_df = df.sample(n=min(500, len(df)), random_state=42).copy()
    
    for pct in price_increases:
        engineer = SubscriptionFeatureEngineer(price_increase_pct=pct)
        engineered = engineer.transform(sample_df)
        probs = model.pipeline.predict_proba(engineered)
        avg_churn = probs[:, 1].mean()
        high_risk_pct = (probs[:, 1] >= 0.7).mean() * 100
        
        results.append({
            'price_increase_pct': pct * 100,
            'avg_churn_prob': avg_churn,
            'high_risk_pct': high_risk_pct
        })
        print(f"Price Increase {pct*100:.0f}%: Avg Churn Prob = {avg_churn:.3f}, High Risk = {high_risk_pct:.1f}%")
    
    results_df = pd.DataFrame(results)
    
    # Plot
    plt.figure(figsize=(10, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(results_df['price_increase_pct'], results_df['avg_churn_prob'], 'o-', linewidth=2)
    plt.xlabel('Price Increase (%)')
    plt.ylabel('Average Churn Probability')
    plt.title('Churn Risk vs Price Increase')
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.plot(results_df['price_increase_pct'], results_df['high_risk_pct'], 'o-', 
             color='red', linewidth=2)
    plt.xlabel('Price Increase (%)')
    plt.ylabel('High Risk Customers (%)')
    plt.title('High Risk % vs Price Increase')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'{output_dir}/price_sensitivity.png', dpi=150)
    plt.close()
    print(f"Price sensitivity plot saved to {output_dir}/price_sensitivity.png")
    
    return results_df


def main():
    print("="*60)
    print("BUSINESS IMPACT ANALYSIS")
    print("="*60)
    
    # Load and prepare data
    df = load_data()
    df = clean_data(df)
    engineer = SubscriptionFeatureEngineer(price_increase_pct=0.15)
    df = engineer.transform(df)
    
    # Load model
    model = PriceIncreaseChurnModel()
    model.load('/app/price_increase_model/price_increase_churn_model.pkl')
    
    # Re-extract feature lists from data
    model.numeric_features, model.categorical_features = model._get_feature_lists(df.drop(columns=['Churn']))
    
    # Predict on full dataset
    features_df = df.drop(columns=['Churn'])
    y_prob = model.predict_proba(features_df)
    y_true = df['Churn']
    
    # Run analyses
    risk_df = analyze_risk_segments(df, y_prob, y_true)
    fi_df = analyze_feature_effects(df, model)
    sensitivity = price_sensitivity_analysis(df, model)
    
    print("\n" + "="*60)
    print("ANALYSIS COMPLETE")
    print("="*60)
    
    return risk_df, fi_df, sensitivity


if __name__ == '__main__':
    from sklearn.metrics import roc_auc_score
    risk_df, fi_df, sensitivity = main()