import streamlit as st import pandas as pd import os from PIL import Image import matplotlib.pyplot as plt import seaborn as sns st.set_page_config(layout="wide") st.title("๐Ÿฉบ Diabetic Retinopathy Project") # Tabs tab1, tab2, tab3 = st.tabs(["๐Ÿ“‚ Dataset Info", "๐Ÿ“Š Training Visualization", "๐Ÿค– Algorithm Used"]) # ============================= # Tab 1: Dataset Information # ============================= with tab1: st.markdown(""" ### ๐Ÿงพ Dataset Overview **Dataset Description:** The DDR dataset contains **13,673 fundus images** from **147 hospitals** across **23 provinces in China**. The images are labeled into 5 classes based on DR severity: - **No_DR** - **Mild** - **Moderate** - **Severe** - **Proliferative_DR** Poor-quality images were removed, and black backgrounds were deleted. [๐Ÿ“Ž Dataset source](https://www.kaggle.com/datasets/mariaherrerot/ddrdataset) ### ๐Ÿงช Data Preparation & Splitting - All images resized to **224x224** - **80% Training**, **20% Testing** (stratified by class) """) # ============================= # Tab 2: Training Visualization # ============================= with tab2: st.markdown("### ๐Ÿ“Š Training Data Class Distribution") # CSV path and image folder path (adjust as needed) CSV_PATH = r"D:\\DR_Classification\\dataset\\DR_grading.csv" IMG_FOLDER = r"D:\\DR_Classification\\dataset\\images" # Folder where all images are stored # Load CSV df = pd.read_csv(CSV_PATH) # Map the 'diagnosis' column to 'label' if it's numeric (e.g., 0 to 4) label_map = { 0: "No_DR", 1: "Mild", 2: "Moderate", 3: "Severe", 4: "Proliferative_DR" } df['label'] = df['diagnosis'].map(label_map) # --- Metric 1: Class Distribution --- st.subheader("1๏ธโƒฃ Class Distribution") class_counts = df['label'].value_counts().reset_index() class_counts.columns = ['Class', 'Count'] fig1, ax1 = plt.subplots() sns.barplot(data=class_counts, x='Class', y='Count', palette='rocket', ax=ax1) ax1.set_title("Class Distribution") st.pyplot(fig1) # --- Metric 2: Sample Images Per Class --- st.subheader("2๏ธโƒฃ Sample Images Per Class") cols = st.columns(len(class_counts)) for i, label in enumerate(class_counts['Class']): sample_row = df[df['label'] == label].iloc[0] # Get first image of this class img_path = os.path.join(IMG_FOLDER, sample_row['id_code']) # Assuming image filenames are id_code.png if os.path.exists(img_path): image = Image.open(img_path) cols[i].image(image, caption=label, use_container_width=True) else: cols[i].write(f"Image not found: {sample_row['id_code']}") # --- Metric 3: Image Size Distribution --- st.subheader("3๏ธโƒฃ Image Size Distribution") image_sizes = [] # Check a few images per class for speed for label in class_counts['Class']: sample_paths = df[df['label'] == label]['id_code'][:5] # 5 images per class for img_code in sample_paths: img_path = os.path.join(IMG_FOLDER, str(img_code)) # Assuming image filenames are id_code.png if os.path.exists(img_path): try: with Image.open(img_path) as img: image_sizes.append(img.size) except Exception as e: st.warning(f"Error loading image {img_code}: {e}") pass if image_sizes: widths, heights = zip(*image_sizes) fig2, ax2 = plt.subplots() sns.histplot(widths, kde=True, label="Width", color="blue") sns.histplot(heights, kde=True, label="Height", color="green") ax2.legend() ax2.set_title("Image Size Distribution") st.pyplot(fig2) else: st.info("No image size data available. Check your paths.") # ============================= # Tab 3: Algorithm Used # ============================= with tab3: st.markdown(""" ### ๐Ÿค– Model and Algorithm We used **Transfer Learning** with **ResNet50** for DR classification. #### ๐Ÿ—๏ธ Model Details: - Input Image Size: **224x224** - Pretrained on **ImageNet** - Optimizer: **Adam** - Loss Function: **Categorical Crossentropy** - Evaluation Metrics: **Accuracy**, **Precision**, **Recall** This architecture is ideal for medical image analysis due to its deep layers and robustness to overfitting. """)