Spaces:

Hanze-Qiu
/

Assignment_2

Build error

App Files Files Community

Hanze-Qiu commited on Feb 13

Commit

ea9e3c0

verified ·

1 Parent(s): 5e1246a

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +38 -0
README.md +39 -5
app.py +201 -0
best.pt +3 -0
detections.parquet +3 -0
requirements.txt +9 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+# Use official Python runtime as base image
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    libgomp1 \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application files
+COPY app.py .
+COPY best.pt .
+COPY detections.parquet .
+# Create directory for video (will be downloaded at runtime)
+RUN mkdir -p /app/data
+# Expose port for Gradio
+EXPOSE 7860
+# Set environment variables
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+ENV GRADIO_SERVER_PORT=7860
+# Run the application
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,45 @@
 ---
-title: Assignment 2
-emoji: 👀
 colorFrom: blue
-colorTo: indigo
 sdk: docker
 pinned: false
-short_description: for Artificial Intelligence Assigment 2
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Car Parts Image-to-Video Retrieval
+emoji: 🚗
 colorFrom: blue
+colorTo: green
 sdk: docker
 pinned: false
+license: mit
 ---
+# Car Parts Image-to-Video Retrieval System
+An intelligent system that detects car parts in images and retrieves matching video clips from an indexed automotive video.
+## Features
+- **YOLOv26s Detection**: Fine-tuned on car parts dataset
+- **Semantic Matching**: Identifies doors, wheels, headlights, mirrors, bumpers, and more
+- **Temporal Retrieval**: Returns precise video clip timestamps
+- **Interactive Demo**: Upload any car image and find matching video segments
+## How to Use
+1. Upload an image containing car parts
+2. The system detects all visible components
+3. View matching video clips with timestamps
+4. Each clip shows where that component appears in the source video
+## Technical Details
+- **Model**: YOLOv26s (small variant) fine-tuned for car part detection
+- **Video Index**: Pre-computed detection index with bounding boxes and timestamps
+- **Sampling Strategy**: Every 5th frame (4.8-6 FPS effective rate)
+- **Clip Formation**: 3.0s gap threshold for temporal merging
+## Assignment Context
+This demo is part of **Assignment 2** for CS-UY 4613 Artificial Intelligence (Spring 2026).
+**Student**: Hanze (James) Qiu
+**Repository**: [github.com/JamesQiu2005/CS-UY_4613_Assignments](https://github.com/JamesQiu2005/CS-UY_4613_Assignments)
+---
+Built with Ultralytics YOLO, OpenCV, and Gradio.

app.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import gradio as gr
+import pandas as pd
+from ultralytics import YOLO
+import cv2
+import numpy as np
+from PIL import Image, ImageDraw
+import os
+# Load model and detection index
+print("Loading model and detection index...")
+model = YOLO("best.pt")
+detection_df = pd.read_parquet("detections.parquet")
+# Video path (you may need to download this at runtime or use URL)
+VIDEO_PATH = "data/videoplayback.mp4"
+VIDEO_URL = "YOUR_VIDEO_URL_HERE"  # Replace with actual video URL or YouTube link
+def download_video_if_needed():
+    """Download video if not present"""
+    if not os.path.exists(VIDEO_PATH):
+        print(f"Video not found at {VIDEO_PATH}")
+        print("Please upload video or provide YouTube URL")
+        # You can add yt-dlp here to download from YouTube
+        return False
+    return True
+def merge_intervals(timestamps, gap_threshold=3.0):
+    """Merge nearby timestamps into contiguous clips"""
+    if not timestamps:
+        return []
+    timestamps = sorted(list(set(timestamps)))
+    clips = []
+    start = timestamps[0]
+    prev = timestamps[0]
+    for t in timestamps[1:]:
+        if t - prev > gap_threshold:
+            clips.append((start, prev))
+            start = t
+        prev = t
+    clips.append((start, prev))
+    return clips
+def retrieve_clips(query_image):
+    """Main retrieval function"""
+    if query_image is None:
+        return "Please upload an image", None, None
+    # Convert to PIL if needed
+    if isinstance(query_image, np.ndarray):
+        query_image = Image.fromarray(query_image)
+    # Detect components in query image
+    results = model(query_image, verbose=False)[0]
+    if len(results.boxes) == 0:
+        return "No car parts detected in the image", query_image, None
+    # Draw boxes on query image
+    query_draw = query_image.copy()
+    draw = ImageDraw.Draw(query_draw)
+    retrieval_info = []
+    all_clips = []
+    # Process each detected component
+    for box_idx in range(len(results.boxes)):
+        cls_id = int(results.boxes.cls[box_idx])
+        cls_name = model.names[cls_id]
+        conf = float(results.boxes.conf[box_idx])
+        bbox = results.boxes.xyxy[box_idx].tolist()
+        if conf < 0.5:
+            continue
+        # Draw bounding box
+        x1, y1, x2, y2 = bbox
+        draw.rectangle([x1, y1, x2, y2], outline='red', width=3)
+        draw.text((x1, y1-20), f"{cls_name} ({conf:.2f})", fill='red')
+        # Search detection index
+        matches = detection_df[detection_df['class_label'] == cls_name]
+        matches = matches[matches['confidence_score'] > 0.5]
+        if len(matches) == 0:
+            retrieval_info.append(f"❌ {cls_name}: No matches found")
+            continue
+        # Merge into clips
+        timestamps = matches['timestamp'].tolist()
+        clips = merge_intervals(timestamps, gap_threshold=3.0)
+        retrieval_info.append(
+            f"✅ {cls_name} (conf: {conf:.2%}): {len(clips)} clips, {len(matches)} frames"
+        )
+        for start, end in clips[:3]:  # Limit to first 3 clips per component
+            all_clips.append({
+                'component': cls_name,
+                'start': start,
+                'end': end,
+                'duration': end - start
+            })
+    info_text = "\n".join(retrieval_info)
+    # Create clips table
+    if all_clips:
+        clips_df = pd.DataFrame(all_clips)
+        return info_text, query_draw, clips_df
+    else:
+        return info_text, query_draw, None
+def extract_frame(component, start_time):
+    """Extract a frame from video at given timestamp"""
+    if not download_video_if_needed():
+        return None
+    cap = cv2.VideoCapture(VIDEO_PATH)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    frame_num = int(start_time * fps)
+    cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
+    ret, frame = cap.read()
+    cap.release()
+    if ret:
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return Image.fromarray(frame_rgb)
+    return None
+# Create Gradio interface
+with gr.Blocks(title="Image-to-Video Retrieval Demo") as demo:
+    gr.Markdown("""
+    # 🚗 Car Parts Image-to-Video Retrieval System
+    Upload an image of a car part, and this system will find matching video clips!
+    **How it works:**
+    1. Upload a car image (doors, wheels, headlights, etc.)
+    2. YOLOv26s detects all car parts in your image
+    3. System retrieves matching video clips from the indexed video
+    4. View timestamps and sample frames
+    **Supported Components:** Doors, wheels, headlights, mirrors, bumpers, and more!
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(type="pil", label="Upload Query Image")
+            search_btn = gr.Button("🔍 Search Video", variant="primary")
+        with gr.Column(scale=1):
+            output_image = gr.Image(type="pil", label="Detected Components")
+            output_text = gr.Textbox(label="Retrieval Results", lines=8)
+    with gr.Row():
+        output_table = gr.Dataframe(
+            label="Matching Video Clips",
+            headers=["component", "start", "end", "duration"]
+        )
+    gr.Markdown("""
+    ---
+    ### 📊 Technical Details
+    - **Model:** YOLOv26s fine-tuned on car parts dataset
+    - **Video Sampling:** Every 5th frame
+    - **Matching:** Semantic component matching with confidence ≥ 0.5
+    - **Clip Formation:** 3.0s gap threshold for temporal merging
+    **Assignment 2 - CS-UY 4613 Artificial Intelligence**
+    Hanze (James) Qiu | Spring 2026
+    """)
+    # Connect button
+    search_btn.click(
+        fn=retrieve_clips,
+        inputs=[input_image],
+        outputs=[output_text, output_image, output_table]
+    )
+    # Example images (optional - add paths to example images)
+    gr.Examples(
+        examples=[
+            # Add paths to example images if you have them
+            # ["examples/car1.jpg"],
+            # ["examples/car2.jpg"],
+        ],
+        inputs=input_image,
+        label="Example Query Images"
+    )
+if __name__ == "__main__":
+    print("Starting Gradio app...")
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77c3e8dc01a08bd99cc5b1f301f4237bd09a79d991b9eb5ec4635065c3feb110
+size 20343045

detections.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e82da2dcb9eada79ba0c3c94256e0f88fe0dfa151437e271e860695f23a500e4
+size 1400297

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch>=2.0.0
+ultralytics>=8.0.0
+opencv-python-headless==4.8.1.78
+pandas>=2.0.0
+pyarrow>=14.0.0
+datasets>=2.16.0
+gradio>=4.0.0
+Pillow>=10.0.0
+numpy>=1.24.0