DR_Classification / pages /Dataset.py
3v324v23's picture
Added Homepage
1b4d479
raw
history blame
4.54 kB
import streamlit as st
import pandas as pd
import os
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
st.set_page_config(layout="wide")
st.title("🩺 Diabetic Retinopathy Project")
# Tabs
tab1, tab2, tab3 = st.tabs(["📂 Dataset Info", "📊 Training Visualization", "🤖 Algorithm Used"])
# =============================
# Tab 1: Dataset Information
# =============================
with tab1:
st.markdown("""
### 🧾 Dataset Overview
**Dataset Description:**
The DDR dataset contains **13,673 fundus images** from **147 hospitals** across **23 provinces in China**. The images are labeled into 5 classes based on DR severity:
- **No_DR**
- **Mild**
- **Moderate**
- **Severe**
- **Proliferative_DR**
Poor-quality images were removed, and black backgrounds were deleted.
[📎 Dataset source](https://www.kaggle.com/datasets/mariaherrerot/ddrdataset)
### 🧪 Data Preparation & Splitting
- All images resized to **224x224**
- **80% Training**, **20% Testing** (stratified by class)
""")
# =============================
# Tab 2: Training Visualization
# =============================
with tab2:
st.markdown("### 📊 Training Data Class Distribution")
# CSV path and image folder path (adjust as needed)
CSV_PATH = r"D:\\DR_Classification\\dataset\\DR_grading.csv"
IMG_FOLDER = r"D:\\DR_Classification\\dataset\\images" # Folder where all images are stored
# Load CSV
df = pd.read_csv(CSV_PATH)
# Map the 'diagnosis' column to 'label' if it's numeric (e.g., 0 to 4)
label_map = {
0: "No_DR",
1: "Mild",
2: "Moderate",
3: "Severe",
4: "Proliferative_DR"
}
df['label'] = df['diagnosis'].map(label_map)
# --- Metric 1: Class Distribution ---
st.subheader("1️⃣ Class Distribution")
class_counts = df['label'].value_counts().reset_index()
class_counts.columns = ['Class', 'Count']
fig1, ax1 = plt.subplots()
sns.barplot(data=class_counts, x='Class', y='Count', palette='rocket', ax=ax1)
ax1.set_title("Class Distribution")
st.pyplot(fig1)
# --- Metric 2: Sample Images Per Class ---
st.subheader("2️⃣ Sample Images Per Class")
cols = st.columns(len(class_counts))
for i, label in enumerate(class_counts['Class']):
sample_row = df[df['label'] == label].iloc[0] # Get first image of this class
img_path = os.path.join(IMG_FOLDER, sample_row['id_code']) # Assuming image filenames are id_code.png
if os.path.exists(img_path):
image = Image.open(img_path)
cols[i].image(image, caption=label, use_container_width=True)
else:
cols[i].write(f"Image not found: {sample_row['id_code']}")
# --- Metric 3: Image Size Distribution ---
st.subheader("3️⃣ Image Size Distribution")
image_sizes = []
# Check a few images per class for speed
for label in class_counts['Class']:
sample_paths = df[df['label'] == label]['id_code'][:5] # 5 images per class
for img_code in sample_paths:
img_path = os.path.join(IMG_FOLDER, str(img_code)) # Assuming image filenames are id_code.png
if os.path.exists(img_path):
try:
with Image.open(img_path) as img:
image_sizes.append(img.size)
except Exception as e:
st.warning(f"Error loading image {img_code}: {e}")
pass
if image_sizes:
widths, heights = zip(*image_sizes)
fig2, ax2 = plt.subplots()
sns.histplot(widths, kde=True, label="Width", color="blue")
sns.histplot(heights, kde=True, label="Height", color="green")
ax2.legend()
ax2.set_title("Image Size Distribution")
st.pyplot(fig2)
else:
st.info("No image size data available. Check your paths.")
# =============================
# Tab 3: Algorithm Used
# =============================
with tab3:
st.markdown("""
### 🤖 Model and Algorithm
We used **Transfer Learning** with **ResNet50** for DR classification.
#### 🏗️ Model Details:
- Input Image Size: **224x224**
- Pretrained on **ImageNet**
- Optimizer: **Adam**
- Loss Function: **Categorical Crossentropy**
- Evaluation Metrics: **Accuracy**, **Precision**, **Recall**
This architecture is ideal for medical image analysis due to its deep layers and robustness to overfitting.
""")