Spaces:

Sompote
/

Resistivity

Build error

App Files Files Community

Sompote commited on Feb 19, 2025

Commit

778f96f

verified ·

1 Parent(s): a74f3cd

Upload 6 files

Browse files

Files changed (6) hide show

README.md +60 -8
app.py +151 -0
data.xlsx +0 -0
model.pth +3 -0
predict.py +35 -0
requirements.txt +8 -0

README.md CHANGED Viewed

@@ -1,14 +1,66 @@
 ---
-title: Resistivity
-emoji: 📉
-colorFrom: indigo
-colorTo: gray
 sdk: streamlit
-sdk_version: 1.42.1
 app_file: app.py
 pinned: false
-license: mit
-short_description: Soil resistivity
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Soil Resistivity Prediction
+emoji: 🚗
+colorFrom: blue
+colorTo: green
 sdk: streamlit
+sdk_version: "1.29.0"
 app_file: app.py
 pinned: false
 ---
+# Resistivity Prediction App
+This is a Streamlit web application for predicting resistivity based on input features. The app uses a trained deep learning model with attention mechanism and provides SHAP value explanations for predictions.
+## Setup Instructions
+1. Create a virtual environment (recommended):
+```bash
+python -m venv venv
+source venv/bin/activate  # On Windows use: venv\Scripts\activate
+```
+2. Install required packages:
+```bash
+pip install -r requirements.txt
+```
+3. Place the following files in the same directory:
+- `model.pth` (trained model file)
+- `data.xlsx` (dataset file with features and target)
+## Running the App
+To run the app, use the following command:
+```bash
+streamlit run app.py
+```
+The app will be available at http://localhost:8501 by default.
+## Usage
+1. Enter values for each feature using the input fields
+2. Click the "Predict" button
+3. View the prediction result and SHAP value explanation
+## Files Description
+- `app.py`: Main Streamlit application file
+- `predict.py`: Contains model architecture and prediction functions
+- `requirements.txt`: List of required Python packages
+- `model.pth`: Trained model weights (not included, must be added)
+- `data.xlsx`: Dataset file (not included, must be added)
+## Model Architecture
+The model uses a TabularTransformer architecture with:
+- Feature embedding layer
+- Multi-head attention mechanism
+- Fully connected layers for prediction
+## Requirements
+- Python 3.8+
+- Required packages listed in requirements.txt

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import torch
+from predict import TabularTransformer, model_predict
+from sklearn.preprocessing import MinMaxScaler
+import matplotlib.pyplot as plt
+import shap
+# Set page config
+st.set_page_config(
+    page_title="Resistivity Prediction App",
+    page_icon="🔮",
+    layout="wide"
+)
+# Title and description
+st.title("Resistivity Prediction App")
+st.markdown("""
+This app predicts resistivity based on input features. Enter the values for each feature
+and click 'Predict' to get the prediction and explanation.
+""")
+@st.cache_resource
+def load_model_and_scalers():
+    # Load data for scaling
+    df = pd.read_excel('data.xlsx')
+    X = df.iloc[:, 0:8]
+    y = df.iloc[:, 8]
+    feature_names = X.columns.tolist()
+    # Initialize scalers
+    scaler_X = MinMaxScaler()
+    scaler_y = MinMaxScaler()
+    # Fit scalers
+    scaler_X.fit(X)
+    scaler_y.fit(y.values.reshape(-1, 1))
+    # Load model
+    model = TabularTransformer(input_dim=8, output_dim=1)
+    model.load_state_dict(torch.load('model.pth'))
+    model.eval()
+    return model, scaler_X, scaler_y, feature_names, X
+def explain_prediction(model, input_df, X_background, scaler_X, scaler_y, feature_names):
+    # Create a prediction function for SHAP
+    def predict_fn(X):
+        X_tensor = torch.FloatTensor(scaler_X.transform(X))
+        with torch.no_grad():
+            scaled_pred = model(X_tensor).numpy()
+        return scaler_y.inverse_transform(scaled_pred)
+    # Use a subset of training data as background
+    background_sample = X_background.sample(n=min(100, len(X_background)), random_state=42)
+    explainer = shap.KernelExplainer(predict_fn, background_sample)
+    # Calculate SHAP values for the input
+    shap_values = explainer.shap_values(input_df)
+    # Handle different SHAP value formats
+    if isinstance(shap_values, list):
+        shap_values = np.array(shap_values[0])
+    # Ensure correct shape for waterfall plot
+    if len(shap_values.shape) > 1:
+        if shap_values.shape[0] == len(feature_names):
+            shap_values = shap_values.T
+        shap_values = shap_values.flatten()
+    # Create waterfall plot
+    plt.figure(figsize=(12, 8))
+    shap.plots.waterfall(
+        shap.Explanation(
+            values=shap_values,
+            base_values=explainer.expected_value if np.isscalar(explainer.expected_value)
+                       else explainer.expected_value[0],
+            data=input_df.iloc[0].values,
+            feature_names=feature_names
+        ),
+        show=False
+    )
+    plt.title('SHAP Value Contributions')
+    plt.tight_layout()
+    plt.savefig('shap_explanation.png', dpi=300, bbox_inches='tight')
+    plt.close()
+    return explainer.expected_value, shap_values
+# Load model and scalers
+try:
+    model, scaler_X, scaler_y, feature_names, X = load_model_and_scalers()
+    # Create input fields for features
+    st.subheader("Input Features")
+    # Create two columns for input fields
+    col1, col2 = st.columns(2)
+    # Dictionary to store input values
+    input_values = {}
+    # Create input fields split between two columns
+    for i, feature in enumerate(feature_names):
+        # Get min and max values for each feature
+        min_val = float(X[feature].min())
+        max_val = float(X[feature].max())
+        # Add input field to alternating columns
+        with col1 if i < len(feature_names)//2 else col2:
+            input_values[feature] = st.number_input(
+                f"{feature}",
+                min_value=float(min_val),
+                max_value=float(max_val),
+                value=float(X[feature].mean()),
+                help=f"Range: {min_val:.2f} to {max_val:.2f}"
+            )
+    # Add predict button
+    if st.button("Predict"):
+        # Create input DataFrame
+        input_df = pd.DataFrame([input_values])
+        # Make prediction
+        prediction = model_predict(model, input_df, scaler_X, scaler_y)
+        # Display prediction
+        st.subheader("Prediction Result")
+        st.markdown(f"### Predicted Resistivity: {prediction[0]:.2f}")
+        # Calculate and display SHAP values
+        st.subheader("Feature Importance Explanation")
+        # Get SHAP values using the training data as background
+        expected_value, shap_values = explain_prediction(
+            model, input_df, X, scaler_X, scaler_y, feature_names
+        )
+        # Display the waterfall plot
+        st.image('shap_explanation.png')
+except Exception as e:
+    st.error(f"""
+    Error loading the model and data. Please make sure:
+    1. The model file 'model.pth' exists
+    2. The data file 'data.xlsx' exists
+    3. All required packages are installed
+    Error details: {str(e)}
+    """)

data.xlsx ADDED Viewed

Binary file (26.3 kB). View file

model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d333cc2a9d3c4f94c55f32b766e58643dbc514a24bea307a8b2aa80dd8d609b0
+size 105820

predict.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+import numpy as np
+import pandas as pd
+import torch.nn as nn
+class TabularTransformer(nn.Module):
+    def __init__(self, input_dim=7, output_dim=1, embedding_dim=64, num_heads=8, hidden_dim=128):
+        super().__init__()
+        self.embedding = nn.Linear(input_dim, embedding_dim)
+        self.attention = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads)
+        self.fc = nn.Sequential(
+            nn.Linear(embedding_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, output_dim)
+        )
+    def forward(self, x):
+        x = self.embedding(x)
+        x = x.unsqueeze(0)     # Add sequence dimension for attention
+        attn_out, _ = self.attention(x, x, x)
+        x = attn_out.squeeze(0)  # Remove sequence dimension
+        return self.fc(x)
+def model_predict(model, X_input, scaler_X, scaler_y):
+    # Convert to tensor
+    X_scaled = scaler_X.transform(X_input)
+    X_tensor = torch.FloatTensor(X_scaled)
+    # Make prediction
+    with torch.no_grad():
+        scaled_pred = model(X_tensor).numpy()
+    # Inverse transform to get original scale prediction
+    prediction = scaler_y.inverse_transform(scaled_pred)
+    return prediction.flatten()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit==1.32.0
+pandas==2.2.0
+numpy==1.26.4
+torch==2.2.0
+scikit-learn==1.4.0
+matplotlib==3.8.3
+shap==0.44.0
+openpyxl==3.1.2