Asnly commited on
Commit
7eec965
·
verified ·
1 Parent(s): 7591256

Deploy Space app files

Browse files
Files changed (1) hide show
  1. app.py +55 -63
app.py CHANGED
@@ -1,9 +1,8 @@
1
  #!/usr/bin/env python3
2
  """HF Space entry point for 3DReflecNet dataset preview.
3
 
4
- Loads the hybrid Hugging Face release using the `datasets` library:
5
- 1. data/metadata/train.parquet for filtering and GLB paths
6
- 2. data/preview/preview.parquet for the small image preview subset
7
  """
8
  from __future__ import annotations
9
 
@@ -50,54 +49,13 @@ atexit.register(shutil.rmtree, str(_GLB_CACHE_DIR), True)
50
  # Data loading
51
  # ---------------------------------------------------------------------------
52
 
53
- def load_metadata() -> pd.DataFrame:
54
- """Load lightweight metadata columns (one row per instance)."""
55
- METADATA_COLS = [
56
- "instance_id", "ply_path", "glb_path",
57
- "main_category", "sub_category", "model_name",
58
- "material_name", "env_name",
59
- "hasGlass", "isGenerated", "transparent", "near_light",
60
- ]
61
- ds = load_dataset(
62
- DATASET_REPO,
63
- data_files="data/metadata/train.parquet",
64
- split="train",
65
- streaming=True,
66
- token=HF_TOKEN,
67
- ).select_columns(METADATA_COLS)
68
- rows: list[dict[str, Any]] = []
69
- seen: dict[str, dict[str, Any]] = {}
70
- for example in ds:
71
- row = {col: example[col] for col in METADATA_COLS}
72
- iid = row["instance_id"]
73
- if not isinstance(iid, str) or not iid.strip():
74
- raise ValueError(f"Invalid instance_id in metadata row: {iid!r}")
75
- if iid in seen:
76
- if seen[iid] != row:
77
- raise ValueError(f"Inconsistent metadata rows for instance_id {iid!r}.")
78
- continue
79
- seen[iid] = row
80
- rows.append(row)
81
- df = pd.DataFrame(rows, columns=METADATA_COLS)
82
- require_text_columns(
83
- df,
84
- [
85
- "instance_id", "ply_path", "glb_path", "main_category", "sub_category",
86
- "model_name", "material_name", "env_name",
87
- ],
88
- "metadata parquet",
89
- )
90
- require_bool_columns(df, BOOL_COLUMNS, "metadata parquet")
91
- return df
92
-
93
-
94
  def load_preview_dataframe() -> pd.DataFrame:
95
  """Load the small preview Parquet into memory."""
96
  PREVIEW_COLS = [
97
  "instance_id", "split", "frame_id", "rgb", "mask",
98
  "depth_preview", "normal_preview",
99
  "main_category", "sub_category", "model_name",
100
- "material_name", "env_name",
101
  "hasGlass", "isGenerated", "transparent", "near_light",
102
  ]
103
  ds = load_dataset(
@@ -113,7 +71,7 @@ def load_preview_dataframe() -> pd.DataFrame:
113
  df,
114
  [
115
  "instance_id", "split", "main_category", "sub_category",
116
- "model_name", "material_name", "env_name",
117
  ],
118
  "preview parquet",
119
  )
@@ -127,6 +85,40 @@ def load_preview_dataframe() -> pd.DataFrame:
127
  return df
128
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  def load_instance_frames(
131
  preview_df: pd.DataFrame, instance_id: str, split: str = "train", max_frames: int = 50,
132
  ) -> list[dict[str, Any]]:
@@ -202,16 +194,18 @@ def download_glb(glb_path: str) -> str:
202
  return str(local)
203
 
204
 
205
- def build_stats_markdown(df: pd.DataFrame) -> str:
206
- """Generate dataset overview statistics."""
207
  total_instances = len(df)
208
  total_models = df["model_name"].nunique()
 
209
  main_cats = df["main_category"].dropna().astype(str)
210
  main_dist = main_cats.value_counts().head(10)
211
  dist_lines = " | ".join(f"**{cat}**: {cnt}" for cat, cnt in main_dist.items())
212
  return (
213
- f"**Dataset Overview** — "
214
  f"**{total_instances}** instances, "
 
215
  f"**{total_models}** models, "
216
  f"**{main_cats.nunique()}** main categories\n\n"
217
  f"Distribution: {dist_lines}"
@@ -222,10 +216,10 @@ def build_stats_markdown(df: pd.DataFrame) -> str:
222
  # App builder
223
  # ---------------------------------------------------------------------------
224
 
225
- def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
226
- model_name_choices = get_distinct_text_choices(df, "model_name")
227
- material_name_choices = get_distinct_text_choices(df, "material_name")
228
- env_name_choices = get_distinct_text_choices(df, "env_name")
229
 
230
  model_display_cols = [
231
  "model_name",
@@ -258,7 +252,7 @@ def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
258
  "glb_path",
259
  ]
260
 
261
- stats_md = build_stats_markdown(df)
262
 
263
  # ---- 3D Viewer callbacks ----
264
 
@@ -272,7 +266,7 @@ def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
272
  near_light: str,
273
  ):
274
  filtered = filter_dataframe_advanced(
275
- preview_df,
276
  model_name=model_name,
277
  material_name=material_name,
278
  env_name=env_name,
@@ -288,7 +282,7 @@ def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
288
  selected = choices[0] if choices else None
289
  summary = (
290
  f"Matched **{len(aggregated)}** models, showing **{len(rows)}**. "
291
- f"Total instances: **{len(filtered)}**."
292
  )
293
  table = shown[model_display_cols] if not shown.empty else pd.DataFrame(columns=model_display_cols)
294
  meta = rows[0] if rows else {}
@@ -326,7 +320,7 @@ def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
326
  near_light: str,
327
  ):
328
  filtered = filter_dataframe_advanced(
329
- df,
330
  model_name=model_name,
331
  material_name=material_name,
332
  env_name=env_name,
@@ -374,7 +368,7 @@ def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
374
  with gr.Blocks(title="3DReflecNet Dataset Explorer") as demo:
375
  gr.Markdown("# 3DReflecNet Dataset Explorer")
376
  gr.Markdown(
377
- "Filter by model/material/environment dropdowns and boolean scene tags, then preview 3D assets or the sampled image subset."
378
  )
379
  gr.Markdown(stats_md)
380
 
@@ -492,13 +486,11 @@ def main() -> None:
492
  setup_logging()
493
  logger.info("DATASET_REPO = %r", DATASET_REPO)
494
  logger.info("HF_TOKEN set = %s, length = %d", HF_TOKEN is not None, len(HF_TOKEN) if HF_TOKEN else 0)
495
- logger.info("Loading dataset metadata from Hugging Face Hub...")
496
- df = load_metadata()
497
- logger.info("Loaded %d instances.", len(df))
498
  logger.info("Loading preview subset from Hugging Face Hub...")
499
  preview_df = load_preview_dataframe()
500
- logger.info("Loaded %d preview rows.", len(preview_df))
501
- app = build_app(df, preview_df)
 
502
  app.launch()
503
 
504
 
 
1
  #!/usr/bin/env python3
2
  """HF Space entry point for 3DReflecNet dataset preview.
3
 
4
+ Loads only data/preview/preview.parquet so the Space exposes the configured
5
+ preview instance subset instead of the full dataset metadata.
 
6
  """
7
  from __future__ import annotations
8
 
 
49
  # Data loading
50
  # ---------------------------------------------------------------------------
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def load_preview_dataframe() -> pd.DataFrame:
53
  """Load the small preview Parquet into memory."""
54
  PREVIEW_COLS = [
55
  "instance_id", "split", "frame_id", "rgb", "mask",
56
  "depth_preview", "normal_preview",
57
  "main_category", "sub_category", "model_name",
58
+ "material_name", "env_name", "glb_path",
59
  "hasGlass", "isGenerated", "transparent", "near_light",
60
  ]
61
  ds = load_dataset(
 
71
  df,
72
  [
73
  "instance_id", "split", "main_category", "sub_category",
74
+ "model_name", "material_name", "env_name", "glb_path",
75
  ],
76
  "preview parquet",
77
  )
 
85
  return df
86
 
87
 
88
+ def build_preview_instance_dataframe(preview_df: pd.DataFrame) -> pd.DataFrame:
89
+ """Derive one row per preview instance from preview frame rows."""
90
+ instance_cols = [
91
+ "instance_id", "main_category", "sub_category", "model_name",
92
+ "material_name", "env_name", "hasGlass", "isGenerated",
93
+ "transparent", "near_light", "glb_path",
94
+ ]
95
+ require_columns(preview_df, instance_cols, "preview parquet")
96
+
97
+ rows: list[dict[str, Any]] = []
98
+ for instance_id, group in preview_df.groupby("instance_id", sort=True):
99
+ row: dict[str, Any] = {}
100
+ for col in instance_cols:
101
+ values = group[col].drop_duplicates().tolist()
102
+ if len(values) != 1:
103
+ raise ValueError(f"Inconsistent {col!r} values for preview instance {instance_id!r}.")
104
+ row[col] = values[0]
105
+ rows.append(row)
106
+
107
+ df = pd.DataFrame(rows, columns=instance_cols)
108
+ require_text_columns(
109
+ df,
110
+ [
111
+ "instance_id", "main_category", "sub_category",
112
+ "model_name", "material_name", "env_name", "glb_path",
113
+ ],
114
+ "preview instance dataframe",
115
+ )
116
+ require_bool_columns(df, BOOL_COLUMNS, "preview instance dataframe")
117
+ if df["glb_path"].map(lambda value: not value.strip()).any():
118
+ raise ValueError("Preview instance dataframe contains empty GLB paths.")
119
+ return df
120
+
121
+
122
  def load_instance_frames(
123
  preview_df: pd.DataFrame, instance_id: str, split: str = "train", max_frames: int = 50,
124
  ) -> list[dict[str, Any]]:
 
194
  return str(local)
195
 
196
 
197
+ def build_stats_markdown(df: pd.DataFrame, preview_df: pd.DataFrame) -> str:
198
+ """Generate preview subset overview statistics."""
199
  total_instances = len(df)
200
  total_models = df["model_name"].nunique()
201
+ total_frames = len(preview_df)
202
  main_cats = df["main_category"].dropna().astype(str)
203
  main_dist = main_cats.value_counts().head(10)
204
  dist_lines = " | ".join(f"**{cat}**: {cnt}" for cat, cnt in main_dist.items())
205
  return (
206
+ f"**Preview Subset** — "
207
  f"**{total_instances}** instances, "
208
+ f"**{total_frames}** frames, "
209
  f"**{total_models}** models, "
210
  f"**{main_cats.nunique()}** main categories\n\n"
211
  f"Distribution: {dist_lines}"
 
216
  # App builder
217
  # ---------------------------------------------------------------------------
218
 
219
+ def build_app(instance_df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
220
+ model_name_choices = get_distinct_text_choices(instance_df, "model_name")
221
+ material_name_choices = get_distinct_text_choices(instance_df, "material_name")
222
+ env_name_choices = get_distinct_text_choices(instance_df, "env_name")
223
 
224
  model_display_cols = [
225
  "model_name",
 
252
  "glb_path",
253
  ]
254
 
255
+ stats_md = build_stats_markdown(instance_df, preview_df)
256
 
257
  # ---- 3D Viewer callbacks ----
258
 
 
266
  near_light: str,
267
  ):
268
  filtered = filter_dataframe_advanced(
269
+ instance_df,
270
  model_name=model_name,
271
  material_name=material_name,
272
  env_name=env_name,
 
282
  selected = choices[0] if choices else None
283
  summary = (
284
  f"Matched **{len(aggregated)}** models, showing **{len(rows)}**. "
285
+ f"Preview instances: **{len(filtered)}**."
286
  )
287
  table = shown[model_display_cols] if not shown.empty else pd.DataFrame(columns=model_display_cols)
288
  meta = rows[0] if rows else {}
 
320
  near_light: str,
321
  ):
322
  filtered = filter_dataframe_advanced(
323
+ instance_df,
324
  model_name=model_name,
325
  material_name=material_name,
326
  env_name=env_name,
 
368
  with gr.Blocks(title="3DReflecNet Dataset Explorer") as demo:
369
  gr.Markdown("# 3DReflecNet Dataset Explorer")
370
  gr.Markdown(
371
+ "Filter the configured preview subset by model/material/environment dropdowns and boolean scene tags."
372
  )
373
  gr.Markdown(stats_md)
374
 
 
486
  setup_logging()
487
  logger.info("DATASET_REPO = %r", DATASET_REPO)
488
  logger.info("HF_TOKEN set = %s, length = %d", HF_TOKEN is not None, len(HF_TOKEN) if HF_TOKEN else 0)
 
 
 
489
  logger.info("Loading preview subset from Hugging Face Hub...")
490
  preview_df = load_preview_dataframe()
491
+ instance_df = build_preview_instance_dataframe(preview_df)
492
+ logger.info("Loaded %d preview rows for %d preview instance(s).", len(preview_df), len(instance_df))
493
+ app = build_app(instance_df, preview_df)
494
  app.launch()
495
 
496