Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """Shared utilities for 3DReflecNet HF release apps.""" | |
| from __future__ import annotations | |
| import logging | |
| from typing import Any | |
| import pandas as pd | |
| logger = logging.getLogger("hf_release") | |
| FILTER_ALL = "ALL" | |
| BOOL_FILTER_CHOICES = [FILTER_ALL, "True", "False"] | |
| def setup_logging(level: int = logging.INFO) -> None: | |
| """Configure logging for hf_release modules.""" | |
| logging.basicConfig( | |
| level=level, | |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | |
| datefmt="%Y-%m-%d %H:%M:%S", | |
| ) | |
| def require_columns(df: pd.DataFrame, columns: list[str], context: str) -> None: | |
| missing = [column for column in columns if column not in df.columns] | |
| if missing: | |
| raise KeyError(f"Missing required column(s) in {context}: {', '.join(missing)}") | |
| def require_bool_columns(df: pd.DataFrame, columns: list[str], context: str) -> None: | |
| require_columns(df, columns, context) | |
| for column in columns: | |
| if df[column].isna().any(): | |
| raise ValueError(f"Boolean column {column!r} contains null values in {context}.") | |
| if not pd.api.types.is_bool_dtype(df[column]): | |
| raise TypeError(f"Expected boolean dtype for column {column!r} in {context}, got {df[column].dtype}.") | |
| def require_text_columns(df: pd.DataFrame, columns: list[str], context: str) -> None: | |
| require_columns(df, columns, context) | |
| for column in columns: | |
| if df[column].isna().any(): | |
| raise ValueError(f"Text column {column!r} contains null values in {context}.") | |
| invalid = df[column].map(lambda value: not isinstance(value, str)) | |
| if invalid.any(): | |
| bad_type = type(df.loc[invalid, column].iloc[0]).__name__ | |
| raise TypeError(f"Expected string values for column {column!r} in {context}, got {bad_type}.") | |
| def parse_bool_filter_value(selected_value: str) -> bool: | |
| if selected_value == "True": | |
| return True | |
| if selected_value == "False": | |
| return False | |
| raise ValueError(f"Unsupported boolean filter value: {selected_value!r}") | |
| def apply_bool_filter(df: pd.DataFrame, column: str, selected_value: str) -> pd.DataFrame: | |
| """Apply tri-state bool filter (ALL/True/False) to a DataFrame column.""" | |
| if selected_value == FILTER_ALL: | |
| return df | |
| if column not in df.columns: | |
| raise KeyError(f"Missing required boolean filter column: {column}") | |
| if not pd.api.types.is_bool_dtype(df[column]): | |
| raise TypeError(f"Expected boolean dtype for column {column!r}, got {df[column].dtype}.") | |
| target = parse_bool_filter_value(selected_value) | |
| return df[df[column] == target] | |
| def get_distinct_text_choices(df: pd.DataFrame, column: str, all_label: str = FILTER_ALL) -> list[str]: | |
| """Build dropdown choices from distinct non-empty text values.""" | |
| if column not in df.columns: | |
| raise KeyError(f"Missing required text choice column: {column}") | |
| values = { | |
| str(v).strip() | |
| for v in df[column].dropna().tolist() | |
| if str(v).strip() | |
| } | |
| if not values: | |
| raise ValueError(f"Column {column!r} has no non-empty values.") | |
| return [all_label] + sorted(values) | |
| def _apply_text_equals(df: pd.DataFrame, column: str, selected_value: str, all_label: str = FILTER_ALL) -> pd.DataFrame: | |
| if column not in df.columns: | |
| raise KeyError(f"Missing required text filter column: {column}") | |
| text = (selected_value or "").strip() | |
| if not text or text == all_label: | |
| return df | |
| return df[df[column].astype(str).str.strip() == text] | |
| def filter_dataframe_advanced( | |
| df: pd.DataFrame, | |
| model_name: str = FILTER_ALL, | |
| material_name: str = FILTER_ALL, | |
| env_name: str = FILTER_ALL, | |
| has_glass: str = FILTER_ALL, | |
| is_generated: str = FILTER_ALL, | |
| transparent: str = FILTER_ALL, | |
| near_light: str = FILTER_ALL, | |
| ) -> pd.DataFrame: | |
| """Filter by model/material/environment exact selection and four tri-state bool fields.""" | |
| selected = df | |
| selected = _apply_text_equals(selected, "model_name", model_name) | |
| selected = _apply_text_equals(selected, "material_name", material_name) | |
| selected = _apply_text_equals(selected, "env_name", env_name) | |
| selected = apply_bool_filter(selected, "hasGlass", has_glass) | |
| selected = apply_bool_filter(selected, "isGenerated", is_generated) | |
| selected = apply_bool_filter(selected, "transparent", transparent) | |
| selected = apply_bool_filter(selected, "near_light", near_light) | |
| return selected.reset_index(drop=True) | |
| def aggregate_by_model( | |
| df: pd.DataFrame, | |
| extra_columns: list[str] | None = None, | |
| ) -> pd.DataFrame: | |
| """Group instances by model_name, counting instances and collecting IDs.""" | |
| base_cols = ["model_name", "main_category", "sub_category", "instance_count", "instance_ids"] | |
| extra = extra_columns or [] | |
| all_cols = base_cols + extra | |
| if df.empty: | |
| return pd.DataFrame(columns=all_cols) | |
| require_columns(df, ["model_name", "main_category", "sub_category", "instance_id"] + extra, "model aggregation") | |
| rows: list[dict[str, Any]] = [] | |
| for model_name, group in df.groupby("model_name", dropna=False, sort=True): | |
| instance_ids = sorted({ | |
| str(v) for v in group["instance_id"].dropna().tolist() if str(v).strip() | |
| }) | |
| row: dict[str, Any] = { | |
| "model_name": str(model_name), | |
| "main_category": str(group["main_category"].iloc[0]), | |
| "sub_category": str(group["sub_category"].iloc[0]), | |
| "instance_count": len(instance_ids), | |
| "instance_ids": "\n".join(instance_ids), | |
| } | |
| for col in extra: | |
| candidates = [str(v) for v in group[col].dropna().tolist() if str(v).strip()] | |
| row[col] = candidates[0] if candidates else "" | |
| rows.append(row) | |
| return pd.DataFrame(rows) | |
| def format_model_choice(index: int, row: dict[str, Any]) -> str: | |
| return f"{index:04d} | {row['model_name']} | instances {row['instance_count']}" | |
| def format_instance_choice(index: int, row: dict[str, Any]) -> str: | |
| return f"{index:04d} | {row['instance_id']} | {row['model_name']}" | |
| def parse_choice_index(choice: str, length: int) -> int | None: | |
| """Extract the numeric index from a formatted choice string.""" | |
| index_str = choice.split("|", 1)[0].strip() | |
| try: | |
| idx = int(index_str) | |
| except ValueError: | |
| return None | |
| if idx < 0 or idx >= length: | |
| return None | |
| return idx | |