Asnly commited on
Commit
7591256
·
verified ·
1 Parent(s): e20c454

Deploy Space app files

Browse files
Files changed (3) hide show
  1. app.py +506 -0
  2. requirements.txt +5 -0
  3. utils.py +166 -0
app.py ADDED
@@ -0,0 +1,506 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """HF Space entry point for 3DReflecNet dataset preview.
3
+
4
+ Loads the hybrid Hugging Face release using the `datasets` library:
5
+ 1. data/metadata/train.parquet for filtering and GLB paths
6
+ 2. data/preview/preview.parquet for the small image preview subset
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import atexit
11
+ import io
12
+ import os
13
+ import shutil
14
+ import tempfile
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ import gradio as gr
19
+ import pandas as pd
20
+ from datasets import load_dataset
21
+ from huggingface_hub import hf_hub_download
22
+ from PIL import Image
23
+
24
+ from utils import (
25
+ BOOL_FILTER_CHOICES,
26
+ FILTER_ALL,
27
+ aggregate_by_model,
28
+ filter_dataframe_advanced,
29
+ format_instance_choice,
30
+ format_model_choice,
31
+ get_distinct_text_choices,
32
+ logger,
33
+ parse_choice_index,
34
+ require_bool_columns,
35
+ require_columns,
36
+ require_text_columns,
37
+ setup_logging,
38
+ )
39
+
40
+ DATASET_REPO = os.environ.get("DATASET_REPO", "3DReflecNet/3DReflecNet")
41
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
42
+ MAX_RESULTS = 300
43
+ BOOL_COLUMNS = ["hasGlass", "isGenerated", "transparent", "near_light"]
44
+
45
+ _GLB_CACHE_DIR = Path(tempfile.mkdtemp(prefix="glb_cache_"))
46
+ atexit.register(shutil.rmtree, str(_GLB_CACHE_DIR), True)
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Data loading
51
+ # ---------------------------------------------------------------------------
52
+
53
+ def load_metadata() -> pd.DataFrame:
54
+ """Load lightweight metadata columns (one row per instance)."""
55
+ METADATA_COLS = [
56
+ "instance_id", "ply_path", "glb_path",
57
+ "main_category", "sub_category", "model_name",
58
+ "material_name", "env_name",
59
+ "hasGlass", "isGenerated", "transparent", "near_light",
60
+ ]
61
+ ds = load_dataset(
62
+ DATASET_REPO,
63
+ data_files="data/metadata/train.parquet",
64
+ split="train",
65
+ streaming=True,
66
+ token=HF_TOKEN,
67
+ ).select_columns(METADATA_COLS)
68
+ rows: list[dict[str, Any]] = []
69
+ seen: dict[str, dict[str, Any]] = {}
70
+ for example in ds:
71
+ row = {col: example[col] for col in METADATA_COLS}
72
+ iid = row["instance_id"]
73
+ if not isinstance(iid, str) or not iid.strip():
74
+ raise ValueError(f"Invalid instance_id in metadata row: {iid!r}")
75
+ if iid in seen:
76
+ if seen[iid] != row:
77
+ raise ValueError(f"Inconsistent metadata rows for instance_id {iid!r}.")
78
+ continue
79
+ seen[iid] = row
80
+ rows.append(row)
81
+ df = pd.DataFrame(rows, columns=METADATA_COLS)
82
+ require_text_columns(
83
+ df,
84
+ [
85
+ "instance_id", "ply_path", "glb_path", "main_category", "sub_category",
86
+ "model_name", "material_name", "env_name",
87
+ ],
88
+ "metadata parquet",
89
+ )
90
+ require_bool_columns(df, BOOL_COLUMNS, "metadata parquet")
91
+ return df
92
+
93
+
94
+ def load_preview_dataframe() -> pd.DataFrame:
95
+ """Load the small preview Parquet into memory."""
96
+ PREVIEW_COLS = [
97
+ "instance_id", "split", "frame_id", "rgb", "mask",
98
+ "depth_preview", "normal_preview",
99
+ "main_category", "sub_category", "model_name",
100
+ "material_name", "env_name",
101
+ "hasGlass", "isGenerated", "transparent", "near_light",
102
+ ]
103
+ ds = load_dataset(
104
+ DATASET_REPO,
105
+ data_files="data/preview/preview.parquet",
106
+ split="train",
107
+ streaming=False,
108
+ token=HF_TOKEN,
109
+ ).select_columns(PREVIEW_COLS)
110
+ df = pd.DataFrame(list(ds))
111
+ require_columns(df, PREVIEW_COLS, "preview parquet")
112
+ require_text_columns(
113
+ df,
114
+ [
115
+ "instance_id", "split", "main_category", "sub_category",
116
+ "model_name", "material_name", "env_name",
117
+ ],
118
+ "preview parquet",
119
+ )
120
+ require_bool_columns(df, BOOL_COLUMNS, "preview parquet")
121
+ if df["frame_id"].isna().any() or not pd.api.types.is_integer_dtype(df["frame_id"]):
122
+ raise TypeError(f"Expected integer dtype for column 'frame_id' in preview parquet, got {df['frame_id'].dtype}.")
123
+ for col in ["rgb", "mask", "depth_preview", "normal_preview"]:
124
+ invalid = df[col].map(lambda value: not isinstance(value, (bytes, bytearray)) or len(value) == 0)
125
+ if invalid.any():
126
+ raise TypeError(f"Expected non-empty binary values for column {col!r} in preview parquet.")
127
+ return df
128
+
129
+
130
+ def load_instance_frames(
131
+ preview_df: pd.DataFrame, instance_id: str, split: str = "train", max_frames: int = 50,
132
+ ) -> list[dict[str, Any]]:
133
+ """Load preview image payloads for one instance from preview Parquet."""
134
+ selected = preview_df[
135
+ (preview_df["instance_id"].astype(str) == str(instance_id))
136
+ & (preview_df["split"].astype(str) == split)
137
+ ].copy()
138
+ selected = selected.sort_values("frame_id").head(max_frames)
139
+
140
+ rows = selected.to_dict(orient="records")
141
+ if not rows and split == "train":
142
+ selected = preview_df[
143
+ preview_df["instance_id"].astype(str) == str(instance_id)
144
+ ].copy()
145
+ selected = selected.sort_values(["split", "frame_id"]).head(max_frames)
146
+ rows = selected.to_dict(orient="records")
147
+
148
+ frames: list[dict[str, Any]] = []
149
+ for example in rows:
150
+ frame_id = int(example["frame_id"])
151
+ frame_item: dict[str, Any] = {"frame_id": frame_id}
152
+ for key in ("rgb", "mask", "depth_preview", "normal_preview"):
153
+ img_bytes = example[key]
154
+ if not isinstance(img_bytes, (bytes, bytearray)) or not img_bytes:
155
+ raise TypeError(f"Expected non-empty image bytes for {key} frame {frame_id}.")
156
+ with Image.open(io.BytesIO(img_bytes)) as img:
157
+ frame_item[key] = img.copy()
158
+ frames.append(frame_item)
159
+ return frames
160
+
161
+
162
+ def render_frame_gallery(frame_items: list[dict[str, Any]], frame_index: float) -> list[tuple[Any, str]]:
163
+ """Render RGB/Mask/Depth/Normal gallery for one selected frame index (1-based)."""
164
+ if not frame_items:
165
+ return []
166
+
167
+ idx = int(round(frame_index)) - 1
168
+ idx = max(0, min(idx, len(frame_items) - 1))
169
+ selected = frame_items[idx]
170
+ frame_id = int(selected["frame_id"])
171
+
172
+ gallery: list[tuple[Any, str]] = []
173
+ for key, label in [
174
+ ("rgb", "RGB"),
175
+ ("mask", "Mask"),
176
+ ("depth_preview", "Depth"),
177
+ ("normal_preview", "Normal"),
178
+ ]:
179
+ gallery.append((selected[key], f"{label} frame_{frame_id:05d}"))
180
+ return gallery
181
+
182
+
183
+ # ---------------------------------------------------------------------------
184
+ # Helpers
185
+ # ---------------------------------------------------------------------------
186
+
187
+ def download_glb(glb_path: str) -> str:
188
+ """Download pre-converted GLB file from HF dataset repo."""
189
+ if not glb_path:
190
+ raise ValueError("GLB path is required.")
191
+ local = _GLB_CACHE_DIR / Path(glb_path).name
192
+ if local.exists():
193
+ return str(local)
194
+ downloaded = hf_hub_download(
195
+ repo_id=DATASET_REPO,
196
+ filename=glb_path,
197
+ repo_type="dataset",
198
+ token=HF_TOKEN,
199
+ )
200
+ shutil.copy2(downloaded, str(local))
201
+ logger.info("GLB ready: %s", local)
202
+ return str(local)
203
+
204
+
205
+ def build_stats_markdown(df: pd.DataFrame) -> str:
206
+ """Generate dataset overview statistics."""
207
+ total_instances = len(df)
208
+ total_models = df["model_name"].nunique()
209
+ main_cats = df["main_category"].dropna().astype(str)
210
+ main_dist = main_cats.value_counts().head(10)
211
+ dist_lines = " | ".join(f"**{cat}**: {cnt}" for cat, cnt in main_dist.items())
212
+ return (
213
+ f"**Dataset Overview** — "
214
+ f"**{total_instances}** instances, "
215
+ f"**{total_models}** models, "
216
+ f"**{main_cats.nunique()}** main categories\n\n"
217
+ f"Distribution: {dist_lines}"
218
+ )
219
+
220
+
221
+ # ---------------------------------------------------------------------------
222
+ # App builder
223
+ # ---------------------------------------------------------------------------
224
+
225
+ def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
226
+ model_name_choices = get_distinct_text_choices(df, "model_name")
227
+ material_name_choices = get_distinct_text_choices(df, "material_name")
228
+ env_name_choices = get_distinct_text_choices(df, "env_name")
229
+
230
+ model_display_cols = [
231
+ "model_name",
232
+ "material_name",
233
+ "env_name",
234
+ "hasGlass",
235
+ "isGenerated",
236
+ "transparent",
237
+ "near_light",
238
+ "instance_count",
239
+ "instance_ids",
240
+ ]
241
+ instance_display_cols = [
242
+ "instance_id",
243
+ "model_name",
244
+ "material_name",
245
+ "env_name",
246
+ "hasGlass",
247
+ "isGenerated",
248
+ "transparent",
249
+ "near_light",
250
+ ]
251
+ model_extra_cols = [
252
+ "material_name",
253
+ "env_name",
254
+ "hasGlass",
255
+ "isGenerated",
256
+ "transparent",
257
+ "near_light",
258
+ "glb_path",
259
+ ]
260
+
261
+ stats_md = build_stats_markdown(df)
262
+
263
+ # ---- 3D Viewer callbacks ----
264
+
265
+ def search_models(
266
+ model_name: str,
267
+ material_name: str,
268
+ env_name: str,
269
+ has_glass: str,
270
+ is_generated: str,
271
+ transparent: str,
272
+ near_light: str,
273
+ ):
274
+ filtered = filter_dataframe_advanced(
275
+ preview_df,
276
+ model_name=model_name,
277
+ material_name=material_name,
278
+ env_name=env_name,
279
+ has_glass=has_glass,
280
+ is_generated=is_generated,
281
+ transparent=transparent,
282
+ near_light=near_light,
283
+ )
284
+ aggregated = aggregate_by_model(filtered, extra_columns=model_extra_cols)
285
+ shown = aggregated.head(MAX_RESULTS).copy()
286
+ rows = shown.to_dict(orient="records")
287
+ choices = [format_model_choice(i, r) for i, r in enumerate(rows)]
288
+ selected = choices[0] if choices else None
289
+ summary = (
290
+ f"Matched **{len(aggregated)}** models, showing **{len(rows)}**. "
291
+ f"Total instances: **{len(filtered)}**."
292
+ )
293
+ table = shown[model_display_cols] if not shown.empty else pd.DataFrame(columns=model_display_cols)
294
+ meta = rows[0] if rows else {}
295
+ return summary, table, gr.update(choices=choices, value=selected), rows, meta
296
+
297
+ def on_model_select(choice: str, rows: list[dict[str, Any]]):
298
+ if not choice or not rows:
299
+ return {}
300
+ idx = parse_choice_index(choice, len(rows))
301
+ if idx is None:
302
+ return {}
303
+ return rows[idx]
304
+
305
+ def on_load_3d(rows: list[dict[str, Any]], choice: str):
306
+ if not choice or not rows:
307
+ return None
308
+ idx = parse_choice_index(choice, len(rows))
309
+ if idx is None:
310
+ return None
311
+ glb = rows[idx]["glb_path"]
312
+ logger.info("on_load_3d: glb_path=%r", glb)
313
+ if not isinstance(glb, str) or not glb.strip():
314
+ raise ValueError(f"Selected model row does not contain a GLB path: {rows[idx]!r}")
315
+ return download_glb(glb)
316
+
317
+ # ---- Image Viewer callbacks ----
318
+
319
+ def search_instances(
320
+ model_name: str,
321
+ material_name: str,
322
+ env_name: str,
323
+ has_glass: str,
324
+ is_generated: str,
325
+ transparent: str,
326
+ near_light: str,
327
+ ):
328
+ filtered = filter_dataframe_advanced(
329
+ df,
330
+ model_name=model_name,
331
+ material_name=material_name,
332
+ env_name=env_name,
333
+ has_glass=has_glass,
334
+ is_generated=is_generated,
335
+ transparent=transparent,
336
+ near_light=near_light,
337
+ )
338
+ shown = filtered.head(MAX_RESULTS).copy()
339
+ rows = shown[instance_display_cols].to_dict(orient="records")
340
+ choices = [format_instance_choice(i, r) for i, r in enumerate(rows)]
341
+ selected = choices[0] if choices else None
342
+ summary = f"Matched **{len(filtered)}** preview instances, showing **{len(rows)}**."
343
+ table = shown[instance_display_cols] if not shown.empty else pd.DataFrame(columns=instance_display_cols)
344
+ return summary, table, gr.update(choices=choices, value=selected), rows
345
+
346
+ def on_load_images(rows: list[dict[str, Any]], choice: str):
347
+ slider_empty = gr.update(minimum=1, maximum=1, step=1, value=1, interactive=False)
348
+ if not choice or not rows:
349
+ return [], slider_empty, []
350
+ idx = parse_choice_index(choice, len(rows))
351
+ if idx is None:
352
+ return [], slider_empty, []
353
+ instance_id = rows[idx]["instance_id"]
354
+ if not isinstance(instance_id, str) or not instance_id.strip():
355
+ raise ValueError(f"Selected instance row has invalid instance_id: {rows[idx]!r}")
356
+ logger.info("Loading images for instance: %s", instance_id)
357
+ frame_items = load_instance_frames(preview_df, instance_id, split="train", max_frames=50)
358
+ if not frame_items:
359
+ return [], slider_empty, []
360
+ slider_ready = gr.update(
361
+ minimum=1,
362
+ maximum=len(frame_items),
363
+ step=1,
364
+ value=1,
365
+ interactive=True,
366
+ )
367
+ return render_frame_gallery(frame_items, 1), slider_ready, frame_items
368
+
369
+ def on_frame_change(frame_idx: float, frame_items: list[dict[str, Any]]):
370
+ return render_frame_gallery(frame_items, frame_idx)
371
+
372
+ # ---- UI ----
373
+
374
+ with gr.Blocks(title="3DReflecNet Dataset Explorer") as demo:
375
+ gr.Markdown("# 3DReflecNet Dataset Explorer")
376
+ gr.Markdown(
377
+ "Filter by model/material/environment dropdowns and boolean scene tags, then preview 3D assets or the sampled image subset."
378
+ )
379
+ gr.Markdown(stats_md)
380
+
381
+ with gr.Tabs():
382
+ # === Tab 1: 3D Viewer ===
383
+ with gr.TabItem("3D Viewer"):
384
+ with gr.Row():
385
+ m3d_model_name = gr.Dropdown(label="model_name", choices=model_name_choices, value=FILTER_ALL)
386
+ m3d_material_name = gr.Dropdown(label="material_name", choices=material_name_choices, value=FILTER_ALL)
387
+ m3d_env_name = gr.Dropdown(label="env_name", choices=env_name_choices, value=FILTER_ALL)
388
+ with gr.Row():
389
+ m3d_has_glass = gr.Dropdown(label="hasGlass", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
390
+ m3d_is_generated = gr.Dropdown(label="isGenerated", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
391
+ m3d_transparent = gr.Dropdown(label="transparent", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
392
+ m3d_near_light = gr.Dropdown(label="near_light", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
393
+ m3d_btn = gr.Button("Search", variant="primary")
394
+
395
+ m3d_summary = gr.Markdown("Click **Search** to browse models.")
396
+ m3d_table = gr.Dataframe(headers=model_display_cols, interactive=False, wrap=True)
397
+ m3d_select = gr.Dropdown(label="Select model", choices=[], value=None)
398
+ m3d_meta = gr.JSON(label="Model Metadata")
399
+
400
+ m3d_load_btn = gr.Button("Load 3D Preview", variant="primary")
401
+ m3d_viewer = gr.Model3D(
402
+ label="3D Preview (GLB)",
403
+ clear_color=(0.35, 0.35, 0.38, 1.0),
404
+ camera_position=(35, 70, 3.5),
405
+ )
406
+
407
+ m3d_state = gr.State([])
408
+
409
+ m3d_btn.click(
410
+ fn=search_models,
411
+ inputs=[
412
+ m3d_model_name,
413
+ m3d_material_name,
414
+ m3d_env_name,
415
+ m3d_has_glass,
416
+ m3d_is_generated,
417
+ m3d_transparent,
418
+ m3d_near_light,
419
+ ],
420
+ outputs=[m3d_summary, m3d_table, m3d_select, m3d_state, m3d_meta],
421
+ )
422
+ m3d_select.change(
423
+ fn=on_model_select,
424
+ inputs=[m3d_select, m3d_state],
425
+ outputs=[m3d_meta],
426
+ )
427
+ m3d_load_btn.click(
428
+ fn=on_load_3d,
429
+ inputs=[m3d_state, m3d_select],
430
+ outputs=[m3d_viewer],
431
+ )
432
+
433
+ # === Tab 2: Image Viewer ===
434
+ with gr.TabItem("Image Viewer"):
435
+ with gr.Row():
436
+ img_model_name = gr.Dropdown(label="model_name", choices=model_name_choices, value=FILTER_ALL)
437
+ img_material_name = gr.Dropdown(label="material_name", choices=material_name_choices, value=FILTER_ALL)
438
+ img_env_name = gr.Dropdown(label="env_name", choices=env_name_choices, value=FILTER_ALL)
439
+ with gr.Row():
440
+ img_has_glass = gr.Dropdown(label="hasGlass", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
441
+ img_is_generated = gr.Dropdown(label="isGenerated", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
442
+ img_transparent = gr.Dropdown(label="transparent", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
443
+ img_near_light = gr.Dropdown(label="near_light", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
444
+ img_btn = gr.Button("Search", variant="primary")
445
+
446
+ img_summary = gr.Markdown("Click **Search** to browse instances.")
447
+ img_table = gr.Dataframe(headers=instance_display_cols, interactive=False, wrap=True)
448
+ img_select = gr.Dropdown(label="Select instance", choices=[], value=None)
449
+
450
+ img_load_btn = gr.Button("Load Instance Frames", variant="primary")
451
+ img_gallery = gr.Gallery(label="Frame Images", columns=4, rows=1, object_fit="contain", height="auto")
452
+ img_frame_slider = gr.Slider(
453
+ label="Frame",
454
+ minimum=1,
455
+ maximum=1,
456
+ step=1,
457
+ value=1,
458
+ interactive=False,
459
+ )
460
+
461
+ img_state = gr.State([])
462
+ img_frame_state = gr.State([])
463
+
464
+ img_btn.click(
465
+ fn=search_instances,
466
+ inputs=[
467
+ img_model_name,
468
+ img_material_name,
469
+ img_env_name,
470
+ img_has_glass,
471
+ img_is_generated,
472
+ img_transparent,
473
+ img_near_light,
474
+ ],
475
+ outputs=[img_summary, img_table, img_select, img_state],
476
+ )
477
+ img_load_btn.click(
478
+ fn=on_load_images,
479
+ inputs=[img_state, img_select],
480
+ outputs=[img_gallery, img_frame_slider, img_frame_state],
481
+ )
482
+ img_frame_slider.change(
483
+ fn=on_frame_change,
484
+ inputs=[img_frame_slider, img_frame_state],
485
+ outputs=[img_gallery],
486
+ )
487
+
488
+ return demo
489
+
490
+
491
+ def main() -> None:
492
+ setup_logging()
493
+ logger.info("DATASET_REPO = %r", DATASET_REPO)
494
+ logger.info("HF_TOKEN set = %s, length = %d", HF_TOKEN is not None, len(HF_TOKEN) if HF_TOKEN else 0)
495
+ logger.info("Loading dataset metadata from Hugging Face Hub...")
496
+ df = load_metadata()
497
+ logger.info("Loaded %d instances.", len(df))
498
+ logger.info("Loading preview subset from Hugging Face Hub...")
499
+ preview_df = load_preview_dataframe()
500
+ logger.info("Loaded %d preview rows.", len(preview_df))
501
+ app = build_app(df, preview_df)
502
+ app.launch()
503
+
504
+
505
+ if __name__ == "__main__":
506
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ datasets>=2.14.0
3
+ huggingface_hub>=0.17.0
4
+ pandas>=1.5.0
5
+ Pillow>=9.0.0
utils.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Shared utilities for 3DReflecNet HF release apps."""
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Any
7
+
8
+ import pandas as pd
9
+
10
+ logger = logging.getLogger("hf_release")
11
+
12
+ FILTER_ALL = "ALL"
13
+ BOOL_FILTER_CHOICES = [FILTER_ALL, "True", "False"]
14
+
15
+
16
+ def setup_logging(level: int = logging.INFO) -> None:
17
+ """Configure logging for hf_release modules."""
18
+ logging.basicConfig(
19
+ level=level,
20
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
21
+ datefmt="%Y-%m-%d %H:%M:%S",
22
+ )
23
+
24
+
25
+ def require_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
26
+ missing = [column for column in columns if column not in df.columns]
27
+ if missing:
28
+ raise KeyError(f"Missing required column(s) in {context}: {', '.join(missing)}")
29
+
30
+
31
+ def require_bool_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
32
+ require_columns(df, columns, context)
33
+ for column in columns:
34
+ if df[column].isna().any():
35
+ raise ValueError(f"Boolean column {column!r} contains null values in {context}.")
36
+ if not pd.api.types.is_bool_dtype(df[column]):
37
+ raise TypeError(f"Expected boolean dtype for column {column!r} in {context}, got {df[column].dtype}.")
38
+
39
+
40
+ def require_text_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
41
+ require_columns(df, columns, context)
42
+ for column in columns:
43
+ if df[column].isna().any():
44
+ raise ValueError(f"Text column {column!r} contains null values in {context}.")
45
+ invalid = df[column].map(lambda value: not isinstance(value, str))
46
+ if invalid.any():
47
+ bad_type = type(df.loc[invalid, column].iloc[0]).__name__
48
+ raise TypeError(f"Expected string values for column {column!r} in {context}, got {bad_type}.")
49
+
50
+
51
+ def parse_bool_filter_value(selected_value: str) -> bool:
52
+ if selected_value == "True":
53
+ return True
54
+ if selected_value == "False":
55
+ return False
56
+ raise ValueError(f"Unsupported boolean filter value: {selected_value!r}")
57
+
58
+
59
+ def apply_bool_filter(df: pd.DataFrame, column: str, selected_value: str) -> pd.DataFrame:
60
+ """Apply tri-state bool filter (ALL/True/False) to a DataFrame column."""
61
+ if selected_value == FILTER_ALL:
62
+ return df
63
+ if column not in df.columns:
64
+ raise KeyError(f"Missing required boolean filter column: {column}")
65
+ if not pd.api.types.is_bool_dtype(df[column]):
66
+ raise TypeError(f"Expected boolean dtype for column {column!r}, got {df[column].dtype}.")
67
+ target = parse_bool_filter_value(selected_value)
68
+ return df[df[column] == target]
69
+
70
+
71
+ def get_distinct_text_choices(df: pd.DataFrame, column: str, all_label: str = FILTER_ALL) -> list[str]:
72
+ """Build dropdown choices from distinct non-empty text values."""
73
+ if column not in df.columns:
74
+ raise KeyError(f"Missing required text choice column: {column}")
75
+ values = {
76
+ str(v).strip()
77
+ for v in df[column].dropna().tolist()
78
+ if str(v).strip()
79
+ }
80
+ if not values:
81
+ raise ValueError(f"Column {column!r} has no non-empty values.")
82
+ return [all_label] + sorted(values)
83
+
84
+
85
+ def _apply_text_equals(df: pd.DataFrame, column: str, selected_value: str, all_label: str = FILTER_ALL) -> pd.DataFrame:
86
+ if column not in df.columns:
87
+ raise KeyError(f"Missing required text filter column: {column}")
88
+ text = (selected_value or "").strip()
89
+ if not text or text == all_label:
90
+ return df
91
+ return df[df[column].astype(str).str.strip() == text]
92
+
93
+
94
+ def filter_dataframe_advanced(
95
+ df: pd.DataFrame,
96
+ model_name: str = FILTER_ALL,
97
+ material_name: str = FILTER_ALL,
98
+ env_name: str = FILTER_ALL,
99
+ has_glass: str = FILTER_ALL,
100
+ is_generated: str = FILTER_ALL,
101
+ transparent: str = FILTER_ALL,
102
+ near_light: str = FILTER_ALL,
103
+ ) -> pd.DataFrame:
104
+ """Filter by model/material/environment exact selection and four tri-state bool fields."""
105
+ selected = df
106
+ selected = _apply_text_equals(selected, "model_name", model_name)
107
+ selected = _apply_text_equals(selected, "material_name", material_name)
108
+ selected = _apply_text_equals(selected, "env_name", env_name)
109
+ selected = apply_bool_filter(selected, "hasGlass", has_glass)
110
+ selected = apply_bool_filter(selected, "isGenerated", is_generated)
111
+ selected = apply_bool_filter(selected, "transparent", transparent)
112
+ selected = apply_bool_filter(selected, "near_light", near_light)
113
+ return selected.reset_index(drop=True)
114
+
115
+
116
+ def aggregate_by_model(
117
+ df: pd.DataFrame,
118
+ extra_columns: list[str] | None = None,
119
+ ) -> pd.DataFrame:
120
+ """Group instances by model_name, counting instances and collecting IDs."""
121
+ base_cols = ["model_name", "main_category", "sub_category", "instance_count", "instance_ids"]
122
+ extra = extra_columns or []
123
+ all_cols = base_cols + extra
124
+
125
+ if df.empty:
126
+ return pd.DataFrame(columns=all_cols)
127
+
128
+ require_columns(df, ["model_name", "main_category", "sub_category", "instance_id"] + extra, "model aggregation")
129
+
130
+ rows: list[dict[str, Any]] = []
131
+ for model_name, group in df.groupby("model_name", dropna=False, sort=True):
132
+ instance_ids = sorted({
133
+ str(v) for v in group["instance_id"].dropna().tolist() if str(v).strip()
134
+ })
135
+ row: dict[str, Any] = {
136
+ "model_name": str(model_name),
137
+ "main_category": str(group["main_category"].iloc[0]),
138
+ "sub_category": str(group["sub_category"].iloc[0]),
139
+ "instance_count": len(instance_ids),
140
+ "instance_ids": "\n".join(instance_ids),
141
+ }
142
+ for col in extra:
143
+ candidates = [str(v) for v in group[col].dropna().tolist() if str(v).strip()]
144
+ row[col] = candidates[0] if candidates else ""
145
+ rows.append(row)
146
+ return pd.DataFrame(rows)
147
+
148
+
149
+ def format_model_choice(index: int, row: dict[str, Any]) -> str:
150
+ return f"{index:04d} | {row['model_name']} | instances {row['instance_count']}"
151
+
152
+
153
+ def format_instance_choice(index: int, row: dict[str, Any]) -> str:
154
+ return f"{index:04d} | {row['instance_id']} | {row['model_name']}"
155
+
156
+
157
+ def parse_choice_index(choice: str, length: int) -> int | None:
158
+ """Extract the numeric index from a formatted choice string."""
159
+ index_str = choice.split("|", 1)[0].strip()
160
+ try:
161
+ idx = int(index_str)
162
+ except ValueError:
163
+ return None
164
+ if idx < 0 or idx >= length:
165
+ return None
166
+ return idx