| |
| """Schema Detector Plugin""" |
| import pandas as pd |
| from typing import Dict, Any |
|
|
| class SchemaDetector: |
| """Detects and reports data schema.""" |
| def get_schema(self, df: pd.DataFrame) -> Dict[str, Any]: |
| schema = {} |
| for col in df.columns: |
| dtype = str(df[col].dtype) |
| if pd.api.types.is_numeric_dtype(df[col]): |
| base_type = "Numeric" |
| elif pd.api.types.is_datetime64_any_dtype(df[col]): |
| base_type = "Datetime" |
| elif df[col].nunique() < min(10, len(df) / 5): |
| base_type = "Categorical" |
| else: |
| base_type = "Text/Object" |
| schema[col] = { |
| "inferred_type": base_type, |
| "pandas_dtype": dtype, |
| "non_null_count": int(df[col].count()), |
| "unique_values": int(df[col].nunique()) |
| } |
| return schema |
|
|