#!/usr/bin/env python3 """Shared utilities for 3DReflecNet HF release apps.""" from __future__ import annotations import logging from typing import Any import pandas as pd logger = logging.getLogger("hf_release") FILTER_ALL = "ALL" BOOL_FILTER_CHOICES = [FILTER_ALL, "True", "False"] def setup_logging(level: int = logging.INFO) -> None: """Configure logging for hf_release modules.""" logging.basicConfig( level=level, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) def require_columns(df: pd.DataFrame, columns: list[str], context: str) -> None: missing = [column for column in columns if column not in df.columns] if missing: raise KeyError(f"Missing required column(s) in {context}: {', '.join(missing)}") def require_bool_columns(df: pd.DataFrame, columns: list[str], context: str) -> None: require_columns(df, columns, context) for column in columns: if df[column].isna().any(): raise ValueError(f"Boolean column {column!r} contains null values in {context}.") if not pd.api.types.is_bool_dtype(df[column]): raise TypeError(f"Expected boolean dtype for column {column!r} in {context}, got {df[column].dtype}.") def require_text_columns(df: pd.DataFrame, columns: list[str], context: str) -> None: require_columns(df, columns, context) for column in columns: if df[column].isna().any(): raise ValueError(f"Text column {column!r} contains null values in {context}.") invalid = df[column].map(lambda value: not isinstance(value, str)) if invalid.any(): bad_type = type(df.loc[invalid, column].iloc[0]).__name__ raise TypeError(f"Expected string values for column {column!r} in {context}, got {bad_type}.") def parse_bool_filter_value(selected_value: str) -> bool: if selected_value == "True": return True if selected_value == "False": return False raise ValueError(f"Unsupported boolean filter value: {selected_value!r}") def apply_bool_filter(df: pd.DataFrame, column: str, selected_value: str) -> pd.DataFrame: """Apply tri-state bool filter (ALL/True/False) to a DataFrame column.""" if selected_value == FILTER_ALL: return df if column not in df.columns: raise KeyError(f"Missing required boolean filter column: {column}") if not pd.api.types.is_bool_dtype(df[column]): raise TypeError(f"Expected boolean dtype for column {column!r}, got {df[column].dtype}.") target = parse_bool_filter_value(selected_value) return df[df[column] == target] def get_distinct_text_choices(df: pd.DataFrame, column: str, all_label: str = FILTER_ALL) -> list[str]: """Build dropdown choices from distinct non-empty text values.""" if column not in df.columns: raise KeyError(f"Missing required text choice column: {column}") values = { str(v).strip() for v in df[column].dropna().tolist() if str(v).strip() } if not values: raise ValueError(f"Column {column!r} has no non-empty values.") return [all_label] + sorted(values) def _apply_text_equals(df: pd.DataFrame, column: str, selected_value: str, all_label: str = FILTER_ALL) -> pd.DataFrame: if column not in df.columns: raise KeyError(f"Missing required text filter column: {column}") text = (selected_value or "").strip() if not text or text == all_label: return df return df[df[column].astype(str).str.strip() == text] def filter_dataframe_advanced( df: pd.DataFrame, model_name: str = FILTER_ALL, material_name: str = FILTER_ALL, env_name: str = FILTER_ALL, has_glass: str = FILTER_ALL, is_generated: str = FILTER_ALL, transparent: str = FILTER_ALL, near_light: str = FILTER_ALL, ) -> pd.DataFrame: """Filter by model/material/environment exact selection and four tri-state bool fields.""" selected = df selected = _apply_text_equals(selected, "model_name", model_name) selected = _apply_text_equals(selected, "material_name", material_name) selected = _apply_text_equals(selected, "env_name", env_name) selected = apply_bool_filter(selected, "hasGlass", has_glass) selected = apply_bool_filter(selected, "isGenerated", is_generated) selected = apply_bool_filter(selected, "transparent", transparent) selected = apply_bool_filter(selected, "near_light", near_light) return selected.reset_index(drop=True) def aggregate_by_model( df: pd.DataFrame, extra_columns: list[str] | None = None, ) -> pd.DataFrame: """Group instances by model_name, counting instances and collecting IDs.""" base_cols = ["model_name", "main_category", "sub_category", "instance_count", "instance_ids"] extra = extra_columns or [] all_cols = base_cols + extra if df.empty: return pd.DataFrame(columns=all_cols) require_columns(df, ["model_name", "main_category", "sub_category", "instance_id"] + extra, "model aggregation") rows: list[dict[str, Any]] = [] for model_name, group in df.groupby("model_name", dropna=False, sort=True): instance_ids = sorted({ str(v) for v in group["instance_id"].dropna().tolist() if str(v).strip() }) row: dict[str, Any] = { "model_name": str(model_name), "main_category": str(group["main_category"].iloc[0]), "sub_category": str(group["sub_category"].iloc[0]), "instance_count": len(instance_ids), "instance_ids": "\n".join(instance_ids), } for col in extra: candidates = [str(v) for v in group[col].dropna().tolist() if str(v).strip()] row[col] = candidates[0] if candidates else "" rows.append(row) return pd.DataFrame(rows) def format_model_choice(index: int, row: dict[str, Any]) -> str: return f"{index:04d} | {row['model_name']} | instances {row['instance_count']}" def format_instance_choice(index: int, row: dict[str, Any]) -> str: return f"{index:04d} | {row['instance_id']} | {row['model_name']}" def parse_choice_index(choice: str, length: int) -> int | None: """Extract the numeric index from a formatted choice string.""" index_str = choice.split("|", 1)[0].strip() try: idx = int(index_str) except ValueError: return None if idx < 0 or idx >= length: return None return idx