import streamlit as st
import pandas as pd
import os
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns

st.set_page_config(layout="wide")
st.title("🩺 Diabetic Retinopathy Project")

# Tabs
tab1, tab2, tab3 = st.tabs(["📂 Dataset Info", "📊 Training Visualization", "🤖 Algorithm Used"])

# =============================
# Tab 1: Dataset Information
# =============================
with tab1:
    st.markdown("""
    ### 🧾 Dataset Overview

    **Dataset Description:**

    The DDR dataset contains **13,673 fundus images** from **147 hospitals** across **23 provinces in China**. The images are labeled into 5 classes based on DR severity:

    - **No_DR**
    - **Mild**
    - **Moderate**
    - **Severe**
    - **Proliferative_DR**

    Poor-quality images were removed, and black backgrounds were deleted.  
    [📎 Dataset source](https://www.kaggle.com/datasets/mariaherrerot/ddrdataset)

    ### 🧪 Data Preparation & Splitting

    - All images resized to **224x224**
    - **80% Training**, **20% Testing** (stratified by class)
    """)

# =============================
# Tab 2: Training Visualization
# =============================
with tab2:
    st.markdown("### 📊 Training Data Class Distribution")
    
    # CSV path and image folder path (adjust as needed)
    CSV_PATH = r"D:\\DR_Classification\\dataset\\DR_grading.csv"
    IMG_FOLDER = r"D:\\DR_Classification\\dataset\\images"  # Folder where all images are stored

    # Load CSV
    df = pd.read_csv(CSV_PATH)

    # Map the 'diagnosis' column to 'label' if it's numeric (e.g., 0 to 4)
    label_map = {
        0: "No_DR",
        1: "Mild",
        2: "Moderate",
        3: "Severe",
        4: "Proliferative_DR"
    }
    df['label'] = df['diagnosis'].map(label_map)

    # --- Metric 1: Class Distribution ---
    st.subheader("1️⃣ Class Distribution")
    class_counts = df['label'].value_counts().reset_index()
    class_counts.columns = ['Class', 'Count']

    fig1, ax1 = plt.subplots()
    sns.barplot(data=class_counts, x='Class', y='Count', palette='rocket', ax=ax1)
    ax1.set_title("Class Distribution")
    st.pyplot(fig1)

    # --- Metric 2: Sample Images Per Class ---
    st.subheader("2️⃣ Sample Images Per Class")

    cols = st.columns(len(class_counts))
    for i, label in enumerate(class_counts['Class']):
        sample_row = df[df['label'] == label].iloc[0]  # Get first image of this class
        img_path = os.path.join(IMG_FOLDER, sample_row['id_code'])  # Assuming image filenames are id_code.png
        if os.path.exists(img_path):
            image = Image.open(img_path)
            cols[i].image(image, caption=label, use_container_width=True)
        else:
            cols[i].write(f"Image not found: {sample_row['id_code']}")

    # --- Metric 3: Image Size Distribution ---
    st.subheader("3️⃣ Image Size Distribution")

    image_sizes = []

    # Check a few images per class for speed
    for label in class_counts['Class']:
        sample_paths = df[df['label'] == label]['id_code'][:5]  # 5 images per class
        for img_code in sample_paths:
            img_path = os.path.join(IMG_FOLDER, str(img_code))  # Assuming image filenames are id_code.png
            if os.path.exists(img_path):
                try:
                    with Image.open(img_path) as img:
                        image_sizes.append(img.size)
                except Exception as e:
                    st.warning(f"Error loading image {img_code}: {e}")
                    pass

    if image_sizes:
        widths, heights = zip(*image_sizes)
        fig2, ax2 = plt.subplots()
        sns.histplot(widths, kde=True, label="Width", color="blue")
        sns.histplot(heights, kde=True, label="Height", color="green")
        ax2.legend()
        ax2.set_title("Image Size Distribution")
        st.pyplot(fig2)
    else:
        st.info("No image size data available. Check your paths.")

# =============================
# Tab 3: Algorithm Used
# =============================
with tab3:
    st.markdown("""
    ### 🤖 Model and Algorithm

    We used **Transfer Learning** with **ResNet50** for DR classification.

    #### 🏗️ Model Details:
    - Input Image Size: **224x224**
    - Pretrained on **ImageNet**
    - Optimizer: **Adam**
    - Loss Function: **Categorical Crossentropy**
    - Evaluation Metrics: **Accuracy**, **Precision**, **Recall**

    This architecture is ideal for medical image analysis due to its deep layers and robustness to overfitting.
    """)