Spaces:
Paused
Paused
| # test.py - M贸dulo para models | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import pickle | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import google.generativeai as genai | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import ( | |
| mean_squared_error, | |
| r2_score, | |
| mean_absolute_percentage_error, | |
| accuracy_score, | |
| precision_score, | |
| recall_score, | |
| f1_score, | |
| confusion_matrix, | |
| classification_report | |
| ) | |
| from sklearn.preprocessing import LabelEncoder | |
| import io | |
| class ModelTester: | |
| def __init__(self, model, X, y, problem_type): | |
| self.model = model | |
| self.X = X | |
| self.y = y | |
| self.problem_type = problem_type | |
| self.label_encoder = None | |
| def _prepare_data(self, test_size=0.2, random_state=42): | |
| """Preparar datos para prueba""" | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| self.X, self.y, | |
| test_size=test_size, | |
| random_state=random_state, | |
| stratify=self.y if self.problem_type == 'classification' else None | |
| ) | |
| return X_train, X_test, y_train, y_test | |
| def _encode_target(self, y): | |
| """Codificar variable objetivo para clasificaci贸n""" | |
| if self.problem_type == 'classification': | |
| self.label_encoder = LabelEncoder() | |
| return self.label_encoder.fit_transform(y) | |
| return y | |
| def evaluate_regression(self, X_test, y_test): | |
| """Evaluar modelo de regresi贸n""" | |
| y_pred = self.model.predict(X_test) | |
| metrics = { | |
| 'MSE': mean_squared_error(y_test, y_pred), | |
| 'R虏 Score': r2_score(y_test, y_pred), | |
| 'MAPE': mean_absolute_percentage_error(y_test, y_pred) * 100 | |
| } | |
| return metrics, y_pred | |
| def evaluate_classification(self, X_test, y_test): | |
| """Evaluar modelo de clasificaci贸n""" | |
| y_test_encoded = self._encode_target(y_test) | |
| y_pred = self.model.predict(X_test) | |
| metrics = { | |
| 'Accuracy': accuracy_score(y_test_encoded, y_pred), | |
| 'Precision': precision_score(y_test_encoded, y_pred, average='weighted'), | |
| 'Recall': recall_score(y_test_encoded, y_pred, average='weighted'), | |
| 'F1 Score': f1_score(y_test_encoded, y_pred, average='weighted') | |
| } | |
| return metrics, y_pred | |
| def plot_regression_results(self, y_test, y_pred): | |
| """Crear gr谩fico de resultados de regresi贸n""" | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter( | |
| x=y_test, y=y_pred, | |
| mode='markers', | |
| name='Predicciones vs Valores Reales' | |
| )) | |
| fig.add_trace(go.Scatter( | |
| x=[y_test.min(), y_test.max()], | |
| y=[y_test.min(), y_test.max()], | |
| mode='lines', | |
| name='L铆nea Perfecta', | |
| line=dict(color='red', dash='dash') | |
| )) | |
| fig.update_layout( | |
| title='Predicciones vs Valores Reales', | |
| xaxis_title='Valores Reales', | |
| yaxis_title='Predicciones' | |
| ) | |
| return fig | |
| def plot_classification_results(self, y_test, y_pred): | |
| """Crear matriz de confusi贸n para clasificaci贸n""" | |
| cm = confusion_matrix( | |
| self._encode_target(y_test), | |
| y_pred | |
| ) | |
| fig = px.imshow( | |
| cm, | |
| labels=dict(x="Predicci贸n", y="Real"), | |
| x=[str(c) for c in self.label_encoder.classes_] if self.label_encoder else None, | |
| y=[str(c) for c in self.label_encoder.classes_] if self.label_encoder else None, | |
| title="Matriz de Confusi贸n" | |
| ) | |
| return fig | |
| def load_model(uploaded_file): | |
| """Cargar modelo desde archivo pickle""" | |
| try: | |
| with uploaded_file as f: | |
| model = pickle.load(f) | |
| return model | |
| except Exception as e: | |
| st.error(f"Error al cargar el modelo: {e}") | |
| return None | |
| def get_model_features(model): | |
| """Extract feature names from the model if available.""" | |
| if hasattr(model, 'feature_names_in_'): | |
| return list(model.feature_names_in_) | |
| return None | |
| def align_features(X, model_features): | |
| """Align input features with model's expected features.""" | |
| if model_features is None: | |
| return X | |
| # Create a new DataFrame with the correct features in the correct order | |
| missing_cols = set(model_features) - set(X.columns) | |
| extra_cols = set(X.columns) - set(model_features) | |
| if missing_cols: | |
| st.warning(f"Missing features: {missing_cols}. These will need to be provided.") | |
| return None | |
| if extra_cols: | |
| st.warning(f"Extra features detected: {extra_cols}. These will be ignored.") | |
| return X[model_features] | |
| def determine_problem_type(model): | |
| """Determine if the model is for classification or regression.""" | |
| class_methods = ['predict_proba', 'classes_'] | |
| return 'classification' if any(hasattr(model, method) for method in class_methods) else 'regression' | |
| def generate_model_explanation(model, metrics, problem_type): | |
| """Generar explicaci贸n del modelo usando Gemini""" | |
| try: | |
| genai.configure(api_key=st.session_state.get('gemini_api_key')) | |
| model_ai = genai.GenerativeModel('gemini-1.5-flash') | |
| metrics_text = "\n".join([f"{k}: {v}" for k, v in metrics.items()]) | |
| prompt = f"""Analiza los siguientes resultados de un modelo de {problem_type}: | |
| M茅tricas de Rendimiento: | |
| {metrics_text} | |
| Proporciona: | |
| 1. Interpretaci贸n de las m茅tricas | |
| 2. Fortalezas y debilidades del modelo | |
| 3. Posibles mejoras o alternativas | |
| 4. Contexto pr谩ctico de estos resultados | |
| """ | |
| response = model_ai.generate_content(prompt) | |
| return response.text | |
| except Exception as e: | |
| st.error(f"Error generando explicaci贸n: {e}") | |
| return "No se pudo generar la explicaci贸n." | |
| def show_test(): | |
| st.title("Prueba de Modelo") | |
| # Cargar modelo | |
| uploaded_model = st.file_uploader( | |
| "Cargar modelo entrenado", | |
| type=['pkl'] | |
| ) | |
| if not uploaded_model: | |
| st.warning("Por favor, cargue un modelo entrenado") | |
| return | |
| # Cargar datos preparados | |
| if 'prepared_data' not in st.session_state: | |
| st.warning("No hay datos preparados. Por favor, prepare los datos primero.") | |
| return | |
| data = st.session_state.prepared_data | |
| # Selecci贸n de caracter铆sticas y objetivo | |
| st.subheader("Configuraci贸n de Prueba") | |
| # Columnas num茅ricas | |
| model_features = get_model_features(uploaded_model) | |
| numeric_cols = data.select_dtypes(include=['int64', 'float64']).columns.tolist() | |
| if model_features: | |
| # Pre-select features that match the model's expected features | |
| default_features = [col for col in model_features if col in numeric_cols] | |
| feature_cols = st.multiselect( | |
| "Seleccionar variables predictoras (X):", | |
| numeric_cols, | |
| default=default_features | |
| ) | |
| else: | |
| feature_cols = st.multiselect( | |
| "Seleccionar variables predictoras (X):", | |
| numeric_cols, | |
| default=st.session_state.get('feature_cols', []) | |
| ) | |
| available_targets = [col for col in data.columns if col not in feature_cols] | |
| target_col = st.selectbox( | |
| "Seleccionar variable objetivo (y):", | |
| available_targets, | |
| index=available_targets.index(st.session_state.get('target_col', available_targets[0])) | |
| if st.session_state.get('target_col') in available_targets else 0 | |
| ) | |
| if not feature_cols or not target_col: | |
| st.warning("Seleccione variables predictoras y objetivo") | |
| return | |
| # Cargar modelo | |
| model = load_model(uploaded_model) | |
| if not model: | |
| return | |
| # Preparar datos | |
| X = data[feature_cols] | |
| y = data[target_col] | |
| # Determinar tipo de problema | |
| problem_type = 'classification' if y.dtype == 'object' or y.nunique() <= 10 else 'regression' | |
| st.write(f"Tipo de problema detectado: {problem_type}") | |
| # Opciones de prueba | |
| test_size = st.slider( | |
| "Tama帽o del conjunto de prueba", | |
| 0.1, 0.5, 0.2 | |
| ) | |
| # Probar modelo | |
| if 'model_evaluated' not in st.session_state: | |
| st.session_state.model_evaluated = False | |
| if st.button("Evaluar Modelo"): | |
| # Crear tester | |
| model_tester = ModelTester(model, X, y, problem_type) | |
| # Preparar datos | |
| X_train, X_test, y_train, y_test = model_tester._prepare_data(test_size) | |
| # Evaluar modelo seg煤n el tipo de problema | |
| if problem_type == 'regression': | |
| metrics, y_pred = model_tester.evaluate_regression(X_test, y_test) | |
| # M茅tricas de rendimiento | |
| st.subheader("M茅tricas de Rendimiento") | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("MSE", f"{metrics['MSE']:.4f}") | |
| col2.metric("R虏 Score", f"{metrics['R虏 Score']:.4f}") | |
| col3.metric("MAPE", f"{metrics['MAPE']:.2f}%") | |
| # Visualizaci贸n de resultados | |
| st.subheader("Visualizaci贸n de Resultados") | |
| fig = model_tester.plot_regression_results(y_test, y_pred) | |
| st.plotly_chart(fig, use_container_width=True) | |
| else: # Clasificaci贸n | |
| metrics, y_pred = model_tester.evaluate_classification(X_test, y_test) | |
| # M茅tricas de rendimiento | |
| st.subheader("M茅tricas de Rendimiento") | |
| col1, col2, col3, col4 = st.columns(4) | |
| col1.metric("Accuracy", f"{metrics['Accuracy']:.4f}") | |
| col2.metric("Precision", f"{metrics['Precision']:.4f}") | |
| col3.metric("Recall", f"{metrics['Recall']:.4f}") | |
| col4.metric("F1 Score", f"{metrics['F1 Score']:.4f}") | |
| # Matriz de confusi贸n | |
| st.subheader("Matriz de Confusi贸n") | |
| fig = model_tester.plot_classification_results(y_test, y_pred) | |
| st.plotly_chart(fig) | |
| # Reporte de clasificaci贸n | |
| st.subheader("Reporte de Clasificaci贸n") | |
| st.text(classification_report( | |
| model_tester._encode_target(y_test), | |
| y_pred | |
| )) | |
| # Guardar m茅tricas en session state | |
| st.session_state.metrics = metrics | |
| st.session_state.model_evaluated = True | |
| # Explicaci贸n del modelo con Gemini (fuera del if anterior) | |
| st.subheader("An谩lisis de Resultados") | |
| if st.session_state.get('gemini_api_key'): | |
| if st.button("Generar Explicaci贸n Detallada", disabled=not st.session_state.model_evaluated, help="Eval煤a el modelo primero"): | |
| with st.spinner("Generando explicaci贸n..."): | |
| explanation = generate_model_explanation( | |
| model, st.session_state.metrics, problem_type | |
| ) | |
| st.markdown(explanation) | |
| else: | |
| st.warning("Configure la API key de Gemini para obtener explicaciones detalladas") | |
| # Predicciones de ejemplo | |
| st.subheader("Predicciones de Ejemplo") | |
| num_samples = st.slider( | |
| "N煤mero de muestras a mostrar", | |
| 5, 50, 10 | |
| ) | |
| # Seleccionar muestras aleatorias | |
| sample_indices = np.random.choice( | |
| len(X_test), | |
| min(num_samples, len(X_test)), | |
| replace=False | |
| ) | |
| sample_X = X_test.iloc[sample_indices] | |
| sample_y_true = y_test.iloc[sample_indices] | |
| sample_y_pred = model.predict(sample_X) | |
| # Crear DataFrame de comparaci贸n | |
| comparison_df = pd.DataFrame({ | |
| 'Caracter铆sticas': [ | |
| ', '.join([f"{col}: {val}" for col, val in row.items()]) | |
| for _, row in sample_X.iterrows() | |
| ], | |
| 'Valor Real': sample_y_true, | |
| 'Predicci贸n': sample_y_pred, | |
| 'Error Absoluto' if problem_type == 'regression' | |
| else 'Predicci贸n Correcta': | |
| np.abs(sample_y_true - sample_y_pred) if problem_type == 'regression' | |
| else (sample_y_true == sample_y_pred) | |
| }) | |
| st.dataframe(comparison_df) | |
| # Opciones de descarga | |
| st.subheader("Descargar Resultados") | |
| # Guardar m茅tricas | |
| metrics_df = pd.DataFrame.from_dict(metrics, orient='index', columns=['Valor']) | |
| # Selector de formato | |
| download_format = st.selectbox( | |
| "Seleccionar formato de descarga", | |
| ["CSV", "Excel"] | |
| ) | |
| if download_format == "CSV": | |
| csv_data = metrics_df.to_csv().encode('utf-8') | |
| st.download_button( | |
| label="Descargar M茅tricas (CSV)", | |
| data=csv_data, | |
| file_name="model_metrics.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| excel_buffer = io.BytesIO() | |
| with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer: | |
| metrics_df.to_excel(writer, index=True, sheet_name='M茅tricas') | |
| comparison_df.to_excel(writer, index=False, sheet_name='Predicciones') | |
| excel_buffer.seek(0) | |
| st.download_button( | |
| label="Descargar Resultados (Excel)", | |
| data=excel_buffer, | |
| file_name="model_results.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| ) | |
| def main(): | |
| """Funci贸n principal para ejecutar la p谩gina de prueba de modelos""" | |
| show_test() | |
| if __name__ == "__main__": | |
| main() | |