akhaliq HF Staff commited on
Commit
81cedc5
·
1 Parent(s): db93463

refactor: migrate Gradio demo to custom FastAPI server and add HTML interface

Browse files
Files changed (2) hide show
  1. app.py +62 -200
  2. index.html +656 -0
app.py CHANGED
@@ -1,6 +1,3 @@
1
- '''
2
- Gradio App Demo
3
- '''
4
  import os, sys, shutil
5
  import json
6
  import glob
@@ -14,6 +11,7 @@ import numpy as np
14
  import torch
15
  import tempfile
16
  import spaces
 
17
 
18
  # Temp file bug of gradio
19
  BASE_TMP_DIR = os.path.abspath("./gradio_tmp")
@@ -23,7 +21,9 @@ os.environ["TEMP"] = BASE_TMP_DIR
23
  os.environ["TMP"] = BASE_TMP_DIR
24
  os.environ["GRADIO_TEMP_DIR"] = BASE_TMP_DIR
25
  tempfile.tempdir = BASE_TMP_DIR
26
- import gradio as gr
 
 
27
 
28
 
29
  # Import your existing project code
@@ -69,11 +69,10 @@ model, model_args = load_model(checkpoint_path)
69
 
70
 
71
 
72
- ######################## Gallery Prepare ########################
73
 
74
  def escape_html(x):
75
  x = "" if x is None else str(x)
76
-
77
  return (
78
  x.replace("&", "&")
79
  .replace("<", "&lt;")
@@ -83,15 +82,6 @@ def escape_html(x):
83
  )
84
 
85
 
86
- def prepare_gallery(page_paths: List[str]):
87
- gallery_items = []
88
-
89
- for page_idx, page_path in enumerate(page_paths):
90
- gallery_items.append((page_path, f"Page {page_idx}"))
91
-
92
- return gallery_items
93
-
94
-
95
  def prepare_result_table(
96
  pred_ranges: List[List[int]],
97
  pred_intra_labels: List[int],
@@ -156,212 +146,84 @@ def prepare_result_table(
156
  return html
157
 
158
 
159
- def list_sample_videos(asset_dir: str = "__assets__", max_samples: int = 8) -> List[List[str]]:
160
  script_dir = os.path.dirname(os.path.abspath(__file__))
161
  asset_dir = os.path.join(script_dir, asset_dir)
162
 
163
  if not os.path.isdir(asset_dir):
164
  return []
165
 
166
- mp4_paths = []
167
  for name in sorted(os.listdir(asset_dir)):
168
  path = os.path.join(asset_dir, name)
169
  if os.path.isfile(path) and name.lower().endswith(".mp4"):
170
- mp4_paths.append([path])
171
 
172
- print("We have", len(mp4_paths), "number of videos!")
173
- return mp4_paths[:max_samples]
174
 
175
- sample_videos = list_sample_videos("__assets__/", max_samples = 16)
176
 
 
 
 
177
 
 
178
 
 
 
 
 
179
 
 
180
  @spaces.GPU(duration=120)
181
- def run_demo(video_file):
182
-
183
- if video_file is None:
184
- raise gr.Error("Please upload a video first.")
185
-
186
- video_path = video_file if isinstance(video_file, str) else video_file.name
187
  if not os.path.exists(video_path):
188
- raise gr.Error(f"Video file does not exist: {video_path}")
189
-
190
- # Read the setting
191
- num_context_frames = DEFAULT_NUM_CONTEXT_FRAMES
192
- max_frames_per_img = DEFAULT_MAX_FRAMES_PER_IMG
193
-
194
 
195
- print("Start processing the video", video_path)
196
  pred_ranges, pred_intra_labels, pred_inter_labels, video_np_full, fps = single_video_inference(
197
- video_path = video_path,
198
- model = model,
199
- model_args = model_args,
200
- num_context_frames = int(num_context_frames),
201
- )
202
- print("Finish running the video")
203
 
204
- # Prepare the folder
205
- cur_VIS_DIR = VIS_DIR + "_" + str(time.time())
206
- if os.path.exists(cur_VIS_DIR):
207
- shutil.rmtree(cur_VIS_DIR)
208
- os.makedirs(cur_VIS_DIR)
209
 
210
- # Visualize and store (Must Do!)
211
  page_paths = visualize_concated_frames(
212
- frames = video_np_full,
213
- out_dir = cur_VIS_DIR,
214
- highlight_ranges_closed = pred_ranges,
215
- max_frames_per_img = int(max_frames_per_img),
216
- end_range_exclusive = True,
217
- fps = fps,
218
- start_index = 0,
219
- )
220
-
221
- gallery_paths = page_paths[:MAX_GALLERY_PAGES]
222
-
223
- result_table = prepare_result_table(
224
- pred_ranges = pred_ranges,
225
- pred_intra_labels = pred_intra_labels,
226
- pred_inter_labels = pred_inter_labels,
227
- fps = fps,
228
- )
229
-
230
- print("Visualization pages:", len(page_paths))
231
- print("Shown visualization pages:", len(gallery_paths))
232
- print("Predicted shots:", len(pred_ranges))
233
-
234
- return gr.update(value = prepare_gallery(gallery_paths)), gr.update(value = result_table)
235
-
236
-
237
- def clear_demo_outputs():
238
- return gr.update(value = []), gr.update(value = "")
239
-
240
-
241
-
242
- # -------------------------
243
- # UI Design
244
- # -------------------------
245
- custom_css = """
246
- #visual_gallery img {
247
- object-fit: contain !important;
248
- }
249
-
250
- #visual_gallery .thumbnail-item {
251
- object-fit: contain !important;
252
- }
253
-
254
- #visual_gallery .grid-wrap {
255
- align-items: start !important;
256
- }
257
-
258
- .result-table-wrap {
259
- width: 100%;
260
- max-height: 360px;
261
- overflow: auto;
262
- border: 1px solid #e5e7eb;
263
- border-radius: 10px;
264
- }
265
-
266
- .result-table {
267
- width: 100%;
268
- border-collapse: collapse;
269
- font-size: 14px;
270
- }
271
-
272
- .result-table th {
273
- position: sticky;
274
- top: 0;
275
- background: #f9fafb;
276
- border-bottom: 1px solid #e5e7eb;
277
- padding: 8px 10px;
278
- text-align: left;
279
- white-space: nowrap;
280
- }
281
-
282
- .result-table td {
283
- border-bottom: 1px solid #f1f5f9;
284
- padding: 8px 10px;
285
- white-space: nowrap;
286
- }
287
-
288
- .result-table tr:hover {
289
- background: #f9fafb;
290
- }
291
- """
292
-
293
-
294
-
295
- MARKDOWN = \
296
- """
297
- <div align="center">
298
-
299
- # OmniShotCut: Holistic Relational Shot Boundary Detection with Shot-Query Transformer
300
-
301
- <b>A sensitive and more informative SoTA shot boundary detection model.</b>
302
-
303
- <br>
304
-
305
- <a href="https://arxiv.org/abs/2604.24762">arXiv</a> ·
306
- <a href="https://uva-computer-vision-lab.github.io/OmniShotCut_website/">Project Page</a> ·
307
- <a href="https://github.com/UVA-Computer-Vision-Lab/OmniShotCut">Github</a> ·
308
- <a href="https://huggingface.co/uva-cv-lab/OmniShotCut">Model</a>
309
-
310
- </div>
311
-
312
- ---
313
-
314
- Upload a video and click <b>Run Inference</b>.
315
- """
316
-
317
-
318
- with gr.Blocks(title="OmniShotCut Demo", css = custom_css) as demo:
319
-
320
- # Head title
321
- gr.Markdown(MARKDOWN)
322
-
323
- with gr.Row():
324
- with gr.Column(scale=1):
325
- video_input = gr.Video(label = "Input Video", height = 480)
326
- run_button = gr.Button("Run Inference", variant="primary")
327
-
328
- with gr.Column(scale=1):
329
- gr.Markdown("## Visualization")
330
- gallery = gr.Gallery(
331
- label = None,
332
- columns = 1,
333
- height = 760,
334
- preview = True,
335
- elem_id = "visual_gallery",
336
- object_fit = "contain",
337
- )
338
-
339
- gr.Markdown("## Predicted Shot Results")
340
- result_table = gr.HTML(
341
- value = "",
342
- elem_id = "result_table",
343
- )
344
-
345
-
346
- gr.Markdown("## Sample Videos")
347
- gr.Examples(
348
- examples = sample_videos,
349
- inputs = [video_input],
350
- label = "Choose a sample video",
351
- )
352
-
353
-
354
- run_button.click(
355
- fn = clear_demo_outputs,
356
- inputs = [],
357
- outputs = [gallery, result_table],
358
- ).then(
359
- fn = run_demo,
360
- inputs =[video_input],
361
- outputs = [gallery, result_table],
362
- )
363
-
364
-
365
 
366
  if __name__ == "__main__":
367
- demo.launch(share=True)
 
 
 
 
 
1
  import os, sys, shutil
2
  import json
3
  import glob
 
11
  import torch
12
  import tempfile
13
  import spaces
14
+ from fastapi.responses import HTMLResponse
15
 
16
  # Temp file bug of gradio
17
  BASE_TMP_DIR = os.path.abspath("./gradio_tmp")
 
21
  os.environ["TMP"] = BASE_TMP_DIR
22
  os.environ["GRADIO_TEMP_DIR"] = BASE_TMP_DIR
23
  tempfile.tempdir = BASE_TMP_DIR
24
+
25
+ from gradio import Server
26
+ from gradio.data_classes import FileData
27
 
28
 
29
  # Import your existing project code
 
69
 
70
 
71
 
72
+ ######################## Utilities ########################
73
 
74
  def escape_html(x):
75
  x = "" if x is None else str(x)
 
76
  return (
77
  x.replace("&", "&amp;")
78
  .replace("<", "&lt;")
 
82
  )
83
 
84
 
 
 
 
 
 
 
 
 
 
85
  def prepare_result_table(
86
  pred_ranges: List[List[int]],
87
  pred_intra_labels: List[int],
 
146
  return html
147
 
148
 
149
+ def list_sample_videos(asset_dir: str = "__assets__", max_samples: int = 8) -> List[dict]:
150
  script_dir = os.path.dirname(os.path.abspath(__file__))
151
  asset_dir = os.path.join(script_dir, asset_dir)
152
 
153
  if not os.path.isdir(asset_dir):
154
  return []
155
 
156
+ samples = []
157
  for name in sorted(os.listdir(asset_dir)):
158
  path = os.path.join(asset_dir, name)
159
  if os.path.isfile(path) and name.lower().endswith(".mp4"):
160
+ samples.append({"path": path, "name": name})
161
 
162
+ return samples[:max_samples]
 
163
 
 
164
 
165
+ # -------------------------
166
+ # Server and API
167
+ # -------------------------
168
 
169
+ app = Server()
170
 
171
+ @app.api()
172
+ def get_examples() -> List[FileData]:
173
+ samples = list_sample_videos("__assets__/", max_samples=16)
174
+ return [FileData(path=s["path"], orig_name=s["name"]) for s in samples]
175
 
176
+ @app.api()
177
  @spaces.GPU(duration=120)
178
+ def run_inference(video_file: FileData) -> dict:
179
+ video_path = video_file.path
 
 
 
 
180
  if not os.path.exists(video_path):
181
+ return {"error": "Video file not found"}
 
 
 
 
 
182
 
183
+ print(f"Start processing: {video_path}")
184
  pred_ranges, pred_intra_labels, pred_inter_labels, video_np_full, fps = single_video_inference(
185
+ video_path=video_path,
186
+ model=model,
187
+ model_args=model_args,
188
+ num_context_frames=DEFAULT_NUM_CONTEXT_FRAMES,
189
+ )
190
+ print("Inference finished")
191
 
192
+ # Prepare visualization directory
193
+ cur_vis_dir = os.path.join(VIS_DIR, f"vis_{int(time.time())}")
194
+ os.makedirs(cur_vis_dir, exist_ok=True)
 
 
195
 
196
+ # Generate visualization frames
197
  page_paths = visualize_concated_frames(
198
+ frames=video_np_full,
199
+ out_dir=cur_vis_dir,
200
+ highlight_ranges_closed=pred_ranges,
201
+ max_frames_per_img=DEFAULT_MAX_FRAMES_PER_IMG,
202
+ end_range_exclusive=True,
203
+ fps=fps,
204
+ start_index=0,
205
+ )
206
+
207
+ gallery_files = [FileData(path=p) for p in page_paths[:MAX_GALLERY_PAGES]]
208
+ result_table_html = prepare_result_table(
209
+ pred_ranges=pred_ranges,
210
+ pred_intra_labels=pred_intra_labels,
211
+ pred_inter_labels=pred_inter_labels,
212
+ fps=fps,
213
+ )
214
+
215
+ return {
216
+ "gallery": gallery_files,
217
+ "table": result_table_html,
218
+ "shot_count": len(pred_ranges)
219
+ }
220
+
221
+ @app.get("/", response_class=HTMLResponse)
222
+ async def homepage():
223
+ html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
224
+ with open(html_path, "r", encoding="utf-8") as f:
225
+ return f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  if __name__ == "__main__":
228
+ app.launch(show_error=True)
229
+ ue)
index.html ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>OmniShotCut | AI Shot Boundary Detection</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com">
8
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
9
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
10
+ <style>
11
+ :root {
12
+ --primary: #8b5cf6;
13
+ --primary-hover: #7c3aed;
14
+ --bg: #09090b;
15
+ --card-bg: #18181b;
16
+ --border: #27272a;
17
+ --text: #fafafa;
18
+ --text-muted: #a1a1aa;
19
+ --accent: #d4d4d8;
20
+ --glass: rgba(24, 24, 27, 0.8);
21
+ --glass-border: rgba(255, 255, 255, 0.1);
22
+ }
23
+
24
+ * {
25
+ box-sizing: border-box;
26
+ margin: 0;
27
+ padding: 0;
28
+ }
29
+
30
+ body {
31
+ font-family: 'Inter', sans-serif;
32
+ background-color: var(--bg);
33
+ color: var(--text);
34
+ line-height: 1.5;
35
+ -webkit-font-smoothing: antialiased;
36
+ overflow-x: hidden;
37
+ }
38
+
39
+ .container {
40
+ max-width: 1200px;
41
+ margin: 0 auto;
42
+ padding: 2rem;
43
+ }
44
+
45
+ header {
46
+ display: flex;
47
+ justify-content: space-between;
48
+ align-items: center;
49
+ padding: 1.5rem 0;
50
+ border-bottom: 1px solid var(--border);
51
+ margin-bottom: 3rem;
52
+ position: sticky;
53
+ top: 0;
54
+ background: var(--glass);
55
+ backdrop-filter: blur(12px);
56
+ z-index: 100;
57
+ width: 100%;
58
+ }
59
+
60
+ .header-content {
61
+ max-width: 1200px;
62
+ margin: 0 auto;
63
+ width: 100%;
64
+ display: flex;
65
+ justify-content: space-between;
66
+ align-items: center;
67
+ padding: 0 2rem;
68
+ }
69
+
70
+ .logo {
71
+ font-size: 1.5rem;
72
+ font-weight: 700;
73
+ letter-spacing: -0.025em;
74
+ background: linear-gradient(to right, #a78bfa, #8b5cf6);
75
+ -webkit-background-clip: text;
76
+ -webkit-text-fill-color: transparent;
77
+ }
78
+
79
+ .nav-links a {
80
+ color: var(--text-muted);
81
+ text-decoration: none;
82
+ font-size: 0.875rem;
83
+ margin-left: 1.5rem;
84
+ transition: color 0.2s;
85
+ }
86
+
87
+ .nav-links a:hover {
88
+ color: var(--text);
89
+ }
90
+
91
+ .hero {
92
+ text-align: center;
93
+ margin-bottom: 4rem;
94
+ animation: fadeIn 0.8s ease-out;
95
+ }
96
+
97
+ .hero h1 {
98
+ font-size: 3.5rem;
99
+ font-weight: 800;
100
+ margin-bottom: 1rem;
101
+ letter-spacing: -0.05em;
102
+ }
103
+
104
+ .hero p {
105
+ font-size: 1.25rem;
106
+ color: var(--text-muted);
107
+ max-width: 700px;
108
+ margin: 0 auto;
109
+ }
110
+
111
+ @keyframes fadeIn {
112
+ from { opacity: 0; transform: translateY(20px); }
113
+ to { opacity: 1; transform: translateY(0); }
114
+ }
115
+
116
+ .main-grid {
117
+ display: grid;
118
+ grid-template-columns: 1fr 1fr;
119
+ gap: 2rem;
120
+ margin-bottom: 4rem;
121
+ }
122
+
123
+ @media (max-width: 1024px) {
124
+ .main-grid {
125
+ grid-template-columns: 1fr;
126
+ }
127
+ }
128
+
129
+ .card {
130
+ background: var(--card-bg);
131
+ border: 1px solid var(--border);
132
+ border-radius: 1rem;
133
+ padding: 1.5rem;
134
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
135
+ transition: transform 0.2s, box-shadow 0.2s;
136
+ }
137
+
138
+ .card:hover {
139
+ box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
140
+ }
141
+
142
+ .upload-area {
143
+ border: 2px dashed var(--border);
144
+ border-radius: 0.75rem;
145
+ padding: 3rem;
146
+ text-align: center;
147
+ cursor: pointer;
148
+ transition: border-color 0.2s, background 0.2s;
149
+ position: relative;
150
+ }
151
+
152
+ .upload-area:hover, .upload-area.dragging {
153
+ border-color: var(--primary);
154
+ background: rgba(139, 92, 246, 0.05);
155
+ }
156
+
157
+ .upload-icon {
158
+ width: 48px;
159
+ height: 48px;
160
+ margin-bottom: 1rem;
161
+ color: var(--text-muted);
162
+ }
163
+
164
+ .upload-area p {
165
+ margin-bottom: 0.5rem;
166
+ }
167
+
168
+ .upload-area span {
169
+ font-size: 0.875rem;
170
+ color: var(--text-muted);
171
+ }
172
+
173
+ #video-input {
174
+ display: none;
175
+ }
176
+
177
+ .preview-video {
178
+ width: 100%;
179
+ border-radius: 0.5rem;
180
+ margin-top: 1rem;
181
+ display: none;
182
+ background: #000;
183
+ }
184
+
185
+ .btn {
186
+ display: inline-flex;
187
+ align-items: center;
188
+ justify-content: center;
189
+ padding: 0.75rem 1.5rem;
190
+ border-radius: 0.5rem;
191
+ font-weight: 600;
192
+ font-size: 0.875rem;
193
+ cursor: pointer;
194
+ transition: all 0.2s;
195
+ border: none;
196
+ width: 100%;
197
+ margin-top: 1rem;
198
+ }
199
+
200
+ .btn-primary {
201
+ background: var(--primary);
202
+ color: white;
203
+ }
204
+
205
+ .btn-primary:hover:not(:disabled) {
206
+ background: var(--primary-hover);
207
+ transform: translateY(-1px);
208
+ }
209
+
210
+ .btn-primary:disabled {
211
+ opacity: 0.5;
212
+ cursor: not-allowed;
213
+ }
214
+
215
+ .results-section {
216
+ display: none;
217
+ animation: fadeIn 0.5s ease-out;
218
+ }
219
+
220
+ .section-title {
221
+ font-size: 1.25rem;
222
+ font-weight: 700;
223
+ margin-bottom: 1.5rem;
224
+ display: flex;
225
+ align-items: center;
226
+ gap: 0.5rem;
227
+ }
228
+
229
+ .gallery-grid {
230
+ display: grid;
231
+ grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
232
+ gap: 1rem;
233
+ margin-bottom: 2rem;
234
+ }
235
+
236
+ .gallery-item {
237
+ position: relative;
238
+ aspect-ratio: 16/9;
239
+ border-radius: 0.5rem;
240
+ overflow: hidden;
241
+ border: 1px solid var(--border);
242
+ cursor: pointer;
243
+ transition: transform 0.2s;
244
+ }
245
+
246
+ .gallery-item:hover {
247
+ transform: scale(1.02);
248
+ border-color: var(--primary);
249
+ }
250
+
251
+ .gallery-item img {
252
+ width: 100%;
253
+ height: 100%;
254
+ object-fit: cover;
255
+ }
256
+
257
+ .table-container {
258
+ width: 100%;
259
+ overflow-x: auto;
260
+ border: 1px solid var(--border);
261
+ border-radius: 0.75rem;
262
+ background: var(--card-bg);
263
+ }
264
+
265
+ /* Result table styling override */
266
+ .result-table-wrap {
267
+ width: 100%;
268
+ max-height: 500px;
269
+ overflow: auto;
270
+ }
271
+
272
+ .result-table {
273
+ width: 100%;
274
+ border-collapse: collapse;
275
+ font-size: 14px;
276
+ color: var(--text);
277
+ }
278
+
279
+ .result-table th {
280
+ position: sticky;
281
+ top: 0;
282
+ background: #27272a;
283
+ border-bottom: 1px solid var(--border);
284
+ padding: 12px 16px;
285
+ text-align: left;
286
+ font-weight: 600;
287
+ color: var(--text-muted);
288
+ }
289
+
290
+ .result-table td {
291
+ border-bottom: 1px solid var(--border);
292
+ padding: 12px 16px;
293
+ }
294
+
295
+ .result-table tr:hover {
296
+ background: rgba(255, 255, 255, 0.02);
297
+ }
298
+
299
+ .loading-overlay {
300
+ position: fixed;
301
+ top: 0;
302
+ left: 0;
303
+ width: 100%;
304
+ height: 100%;
305
+ background: rgba(0, 0, 0, 0.8);
306
+ display: none;
307
+ flex-direction: column;
308
+ align-items: center;
309
+ justify-content: center;
310
+ z-index: 1000;
311
+ backdrop-filter: blur(4px);
312
+ }
313
+
314
+ .spinner {
315
+ width: 40px;
316
+ height: 40px;
317
+ border: 4px solid rgba(255, 255, 255, 0.1);
318
+ border-left-color: var(--primary);
319
+ border-radius: 50%;
320
+ animation: spin 1s linear infinite;
321
+ margin-bottom: 1rem;
322
+ }
323
+
324
+ @keyframes spin {
325
+ to { transform: rotate(360deg); }
326
+ }
327
+
328
+ .examples-section {
329
+ margin-top: 4rem;
330
+ padding-top: 2rem;
331
+ border-top: 1px solid var(--border);
332
+ }
333
+
334
+ .examples-grid {
335
+ display: grid;
336
+ grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
337
+ gap: 1.5rem;
338
+ margin-top: 1.5rem;
339
+ }
340
+
341
+ .example-item {
342
+ cursor: pointer;
343
+ border-radius: 0.75rem;
344
+ overflow: hidden;
345
+ border: 1px solid var(--border);
346
+ transition: all 0.2s;
347
+ position: relative;
348
+ }
349
+
350
+ .example-item:hover {
351
+ border-color: var(--primary);
352
+ transform: translateY(-4px);
353
+ }
354
+
355
+ .example-item video {
356
+ width: 100%;
357
+ display: block;
358
+ }
359
+
360
+ .example-label {
361
+ padding: 0.75rem;
362
+ font-size: 0.875rem;
363
+ font-weight: 500;
364
+ text-align: center;
365
+ background: var(--card-bg);
366
+ }
367
+
368
+ footer {
369
+ margin-top: 8rem;
370
+ padding: 4rem 0;
371
+ border-top: 1px solid var(--border);
372
+ text-align: center;
373
+ color: var(--text-muted);
374
+ font-size: 0.875rem;
375
+ }
376
+
377
+ /* Modal for full size visualization */
378
+ .modal {
379
+ display: none;
380
+ position: fixed;
381
+ top: 0;
382
+ left: 0;
383
+ width: 100%;
384
+ height: 100%;
385
+ background: rgba(0, 0, 0, 0.95);
386
+ z-index: 2000;
387
+ justify-content: center;
388
+ align-items: center;
389
+ padding: 2rem;
390
+ }
391
+
392
+ .modal-content {
393
+ max-width: 100%;
394
+ max-height: 100%;
395
+ object-fit: contain;
396
+ }
397
+
398
+ .close-modal {
399
+ position: absolute;
400
+ top: 2rem;
401
+ right: 2rem;
402
+ color: white;
403
+ font-size: 2rem;
404
+ cursor: pointer;
405
+ }
406
+ </style>
407
+ </head>
408
+ <body>
409
+
410
+ <header>
411
+ <div class="header-content">
412
+ <div class="logo">OmniShotCut</div>
413
+ <nav class="nav-links">
414
+ <a href="https://arxiv.org/abs/2604.24762" target="_blank">arXiv</a>
415
+ <a href="https://uva-computer-vision-lab.github.io/OmniShotCut_website/" target="_blank">Project</a>
416
+ <a href="https://github.com/UVA-Computer-Vision-Lab/OmniShotCut" target="_blank">GitHub</a>
417
+ </nav>
418
+ </div>
419
+ </header>
420
+
421
+ <div class="container">
422
+ <section class="hero">
423
+ <h1>Relational Shot Detection</h1>
424
+ <p>State-of-the-art shot boundary detection using Shot-Query Transformer. Analyze videos with holistic temporal understanding.</p>
425
+ </section>
426
+
427
+ <div class="main-grid">
428
+ <div class="card">
429
+ <div class="section-title">
430
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
431
+ Upload Video
432
+ </div>
433
+ <div class="upload-area" id="drop-zone">
434
+ <svg class="upload-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="2" y="2" width="20" height="20" rx="2.18" ry="2.18"/><line x1="7" y1="2" x2="7" y2="22"/><line x1="17" y1="2" x2="17" y2="22"/><line x1="2" y1="12" x2="22" y2="12"/><line x1="2" y1="7" x2="7" y2="7"/><line x1="2" y1="17" x2="7" y2="17"/><line x1="17" y1="17" x2="22" y2="17"/><line x1="17" y1="7" x2="22" y2="7"/></svg>
435
+ <p>Drag & drop or click to upload</p>
436
+ <span>Supports MP4, AVI, MOV</span>
437
+ <input type="file" id="video-input" accept="video/*">
438
+ </div>
439
+ <video id="preview-video" class="preview-video" controls></video>
440
+ <button id="run-btn" class="btn btn-primary" disabled>Run Inference</button>
441
+ </div>
442
+
443
+ <div class="card">
444
+ <div class="section-title">
445
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><circle cx="12" cy="12" r="10"/><line x1="12" y1="16" x2="12" y2="12"/><line x1="12" y1="8" x2="12.01" y2="8"/></svg>
446
+ How it works
447
+ </div>
448
+ <p style="color: var(--text-muted); font-size: 0.9rem; margin-bottom: 1rem;">
449
+ OmniShotCut uses a Transformer-based architecture to detect shot boundaries by considering the relationships between all frames in a segment, rather than just local changes.
450
+ </p>
451
+ <ul style="color: var(--text-muted); font-size: 0.9rem; margin-left: 1.25rem; display: flex; flex-direction: column; gap: 0.5rem;">
452
+ <li>High sensitivity to subtle transitions</li>
453
+ <li>Holistic relational modeling</li>
454
+ <li>Support for complex intra/inter label classification</li>
455
+ <li>Efficient ZeroGPU inference</li>
456
+ </ul>
457
+ </div>
458
+ </div>
459
+
460
+ <section id="results" class="results-section">
461
+ <div class="section-title">
462
+ <svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="3" width="18" height="18" rx="2"/><path d="M3 9h18"/><path d="M9 21V9"/></svg>
463
+ Inference Results <span id="shot-badge" style="background: var(--primary); padding: 2px 8px; border-radius: 99px; font-size: 0.75rem; margin-left: 0.5rem;">0 Shots</span>
464
+ </div>
465
+
466
+ <h3 style="margin-bottom: 1rem; font-size: 1rem; color: var(--text-muted);">Visualization</h3>
467
+ <div class="gallery-grid" id="gallery">
468
+ <!-- Gallery items will be injected here -->
469
+ </div>
470
+
471
+ <h3 style="margin-bottom: 1rem; font-size: 1rem; color: var(--text-muted);">Detailed Shot List</h3>
472
+ <div id="table-container" class="table-container">
473
+ <!-- Table will be injected here -->
474
+ </div>
475
+ </section>
476
+
477
+ <section class="examples-section">
478
+ <div class="section-title">Try Examples</div>
479
+ <div class="examples-grid" id="examples-grid">
480
+ <!-- Examples will be injected here -->
481
+ </div>
482
+ </section>
483
+
484
+ <footer>
485
+ <p>&copy; 2026 OmniShotCut. Built with Gradio Server & FastAPI.</p>
486
+ </footer>
487
+ </div>
488
+
489
+ <div class="loading-overlay" id="loader">
490
+ <div class="spinner"></div>
491
+ <p id="loading-text">Processing video...</p>
492
+ </div>
493
+
494
+ <div class="modal" id="modal">
495
+ <span class="close-modal">&times;</span>
496
+ <img class="modal-content" id="modal-img">
497
+ </div>
498
+
499
+ <script type="module">
500
+ import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
501
+
502
+ const dropZone = document.getElementById('drop-zone');
503
+ const videoInput = document.getElementById('video-input');
504
+ const previewVideo = document.getElementById('preview-video');
505
+ const runBtn = document.getElementById('run-btn');
506
+ const loader = document.getElementById('loader');
507
+ const resultsSection = document.getElementById('results');
508
+ const gallery = document.getElementById('gallery');
509
+ const tableContainer = document.getElementById('table-container');
510
+ const shotBadge = document.getElementById('shot-badge');
511
+ const modal = document.getElementById('modal');
512
+ const modalImg = document.getElementById('modal-img');
513
+ const examplesGrid = document.getElementById('examples-grid');
514
+
515
+ let selectedFile = null;
516
+ let client = null;
517
+
518
+ // Initialize Gradio Client
519
+ async function initClient() {
520
+ client = await Client.connect(window.location.origin);
521
+ console.log("Gradio Client Connected");
522
+ }
523
+ initClient();
524
+
525
+ // Handle File Selection
526
+ dropZone.onclick = () => videoInput.click();
527
+
528
+ videoInput.onchange = (e) => {
529
+ const file = e.target.files[0];
530
+ if (file) handleFile(file);
531
+ };
532
+
533
+ dropZone.ondragover = (e) => {
534
+ e.preventDefault();
535
+ dropZone.classList.add('dragging');
536
+ };
537
+
538
+ dropZone.ondragleave = () => {
539
+ dropZone.classList.remove('dragging');
540
+ };
541
+
542
+ dropZone.ondrop = (e) => {
543
+ e.preventDefault();
544
+ dropZone.classList.remove('dragging');
545
+ const file = e.dataTransfer.files[0];
546
+ if (file) handleFile(file);
547
+ };
548
+
549
+ function handleFile(file) {
550
+ selectedFile = file;
551
+ const url = URL.createObjectURL(file);
552
+ previewVideo.src = url;
553
+ previewVideo.style.display = 'block';
554
+ dropZone.style.display = 'none';
555
+ runBtn.disabled = false;
556
+ }
557
+
558
+ // Run Inference
559
+ runBtn.onclick = async () => {
560
+ if (!selectedFile || !client) return;
561
+
562
+ loader.style.display = 'flex';
563
+ resultsSection.style.display = 'none';
564
+
565
+ try {
566
+ const result = await client.predict("/run_inference", {
567
+ video_file: handle_file(selectedFile)
568
+ });
569
+
570
+ const data = result.data[0];
571
+ renderResults(data);
572
+ } catch (error) {
573
+ console.error("Inference failed:", error);
574
+ alert("Inference failed. Check console for details.");
575
+ } finally {
576
+ loader.style.display = 'none';
577
+ }
578
+ };
579
+
580
+ function renderResults(data) {
581
+ resultsSection.style.display = 'block';
582
+ shotBadge.innerText = `${data.shot_count} Shots`;
583
+
584
+ // Render Gallery
585
+ gallery.innerHTML = '';
586
+ data.gallery.forEach(file => {
587
+ const item = document.createElement('div');
588
+ item.className = 'gallery-item';
589
+ const img = document.createElement('img');
590
+ img.src = file.url;
591
+ item.appendChild(img);
592
+ item.onclick = () => {
593
+ modalImg.src = file.url;
594
+ modal.style.display = 'flex';
595
+ };
596
+ gallery.appendChild(item);
597
+ });
598
+
599
+ // Render Table
600
+ tableContainer.innerHTML = data.table;
601
+
602
+ // Scroll to results
603
+ resultsSection.scrollIntoView({ behavior: 'smooth' });
604
+ }
605
+
606
+ // Modal close
607
+ document.querySelector('.close-modal').onclick = () => {
608
+ modal.style.display = 'none';
609
+ };
610
+
611
+ // Fetch examples from backend
612
+ async function loadExamples() {
613
+ try {
614
+ // We'll expose an API for examples too
615
+ const result = await client.predict("/get_examples", {});
616
+ const examples = result.data[0];
617
+
618
+ examplesGrid.innerHTML = '';
619
+ examples.forEach(ex => {
620
+ const item = document.createElement('div');
621
+ item.className = 'example-item';
622
+
623
+ const video = document.createElement('video');
624
+ video.src = ex.url;
625
+ video.muted = true;
626
+ video.onmouseover = () => video.play();
627
+ video.onmouseout = () => { video.pause(); video.currentTime = 0; };
628
+
629
+ const label = document.createElement('div');
630
+ label.className = 'example-label';
631
+ label.innerText = ex.orig_name || 'Example';
632
+
633
+ item.appendChild(video);
634
+ item.appendChild(label);
635
+
636
+ item.onclick = async () => {
637
+ const response = await fetch(ex.url);
638
+ const blob = await response.blob();
639
+ const file = new File([blob], ex.orig_name || 'example.mp4', { type: 'video/mp4' });
640
+ handleFile(file);
641
+ window.scrollTo({ top: dropZone.offsetTop - 100, behavior: 'smooth' });
642
+ };
643
+
644
+ examplesGrid.appendChild(item);
645
+ });
646
+ } catch (e) {
647
+ console.log("No examples found or API missing");
648
+ }
649
+ }
650
+
651
+ // Wait a bit for client to connect then load examples
652
+ setTimeout(loadExamples, 1000);
653
+
654
+ </script>
655
+ </body>
656
+ </html>