Spaces:
Build error
Build error
Upload 6 files
Browse files- README.md +60 -8
- app.py +151 -0
- data.xlsx +0 -0
- model.pth +3 -0
- predict.py +35 -0
- requirements.txt +8 -0
README.md
CHANGED
|
@@ -1,14 +1,66 @@
|
|
| 1 |
---
|
| 2 |
-
title: Resistivity
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: streamlit
|
| 7 |
-
sdk_version: 1.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
license: mit
|
| 11 |
-
short_description: Soil resistivity
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Soil Resistivity Prediction
|
| 3 |
+
emoji: 🚗
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
sdk: streamlit
|
| 7 |
+
sdk_version: "1.29.0"
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# Resistivity Prediction App
|
| 13 |
+
|
| 14 |
+
This is a Streamlit web application for predicting resistivity based on input features. The app uses a trained deep learning model with attention mechanism and provides SHAP value explanations for predictions.
|
| 15 |
+
|
| 16 |
+
## Setup Instructions
|
| 17 |
+
|
| 18 |
+
1. Create a virtual environment (recommended):
|
| 19 |
+
```bash
|
| 20 |
+
python -m venv venv
|
| 21 |
+
source venv/bin/activate # On Windows use: venv\Scripts\activate
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
2. Install required packages:
|
| 25 |
+
```bash
|
| 26 |
+
pip install -r requirements.txt
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
3. Place the following files in the same directory:
|
| 30 |
+
- `model.pth` (trained model file)
|
| 31 |
+
- `data.xlsx` (dataset file with features and target)
|
| 32 |
+
|
| 33 |
+
## Running the App
|
| 34 |
+
|
| 35 |
+
To run the app, use the following command:
|
| 36 |
+
```bash
|
| 37 |
+
streamlit run app.py
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
The app will be available at http://localhost:8501 by default.
|
| 41 |
+
|
| 42 |
+
## Usage
|
| 43 |
+
|
| 44 |
+
1. Enter values for each feature using the input fields
|
| 45 |
+
2. Click the "Predict" button
|
| 46 |
+
3. View the prediction result and SHAP value explanation
|
| 47 |
+
|
| 48 |
+
## Files Description
|
| 49 |
+
|
| 50 |
+
- `app.py`: Main Streamlit application file
|
| 51 |
+
- `predict.py`: Contains model architecture and prediction functions
|
| 52 |
+
- `requirements.txt`: List of required Python packages
|
| 53 |
+
- `model.pth`: Trained model weights (not included, must be added)
|
| 54 |
+
- `data.xlsx`: Dataset file (not included, must be added)
|
| 55 |
+
|
| 56 |
+
## Model Architecture
|
| 57 |
+
|
| 58 |
+
The model uses a TabularTransformer architecture with:
|
| 59 |
+
- Feature embedding layer
|
| 60 |
+
- Multi-head attention mechanism
|
| 61 |
+
- Fully connected layers for prediction
|
| 62 |
+
|
| 63 |
+
## Requirements
|
| 64 |
+
|
| 65 |
+
- Python 3.8+
|
| 66 |
+
- Required packages listed in requirements.txt
|
app.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import torch
|
| 5 |
+
from predict import TabularTransformer, model_predict
|
| 6 |
+
from sklearn.preprocessing import MinMaxScaler
|
| 7 |
+
import matplotlib.pyplot as plt
|
| 8 |
+
import shap
|
| 9 |
+
|
| 10 |
+
# Set page config
|
| 11 |
+
st.set_page_config(
|
| 12 |
+
page_title="Resistivity Prediction App",
|
| 13 |
+
page_icon="🔮",
|
| 14 |
+
layout="wide"
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
# Title and description
|
| 18 |
+
st.title("Resistivity Prediction App")
|
| 19 |
+
st.markdown("""
|
| 20 |
+
This app predicts resistivity based on input features. Enter the values for each feature
|
| 21 |
+
and click 'Predict' to get the prediction and explanation.
|
| 22 |
+
""")
|
| 23 |
+
|
| 24 |
+
@st.cache_resource
|
| 25 |
+
def load_model_and_scalers():
|
| 26 |
+
# Load data for scaling
|
| 27 |
+
df = pd.read_excel('data.xlsx')
|
| 28 |
+
X = df.iloc[:, 0:8]
|
| 29 |
+
y = df.iloc[:, 8]
|
| 30 |
+
feature_names = X.columns.tolist()
|
| 31 |
+
|
| 32 |
+
# Initialize scalers
|
| 33 |
+
scaler_X = MinMaxScaler()
|
| 34 |
+
scaler_y = MinMaxScaler()
|
| 35 |
+
|
| 36 |
+
# Fit scalers
|
| 37 |
+
scaler_X.fit(X)
|
| 38 |
+
scaler_y.fit(y.values.reshape(-1, 1))
|
| 39 |
+
|
| 40 |
+
# Load model
|
| 41 |
+
model = TabularTransformer(input_dim=8, output_dim=1)
|
| 42 |
+
model.load_state_dict(torch.load('model.pth'))
|
| 43 |
+
model.eval()
|
| 44 |
+
|
| 45 |
+
return model, scaler_X, scaler_y, feature_names, X
|
| 46 |
+
|
| 47 |
+
def explain_prediction(model, input_df, X_background, scaler_X, scaler_y, feature_names):
|
| 48 |
+
# Create a prediction function for SHAP
|
| 49 |
+
def predict_fn(X):
|
| 50 |
+
X_tensor = torch.FloatTensor(scaler_X.transform(X))
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
scaled_pred = model(X_tensor).numpy()
|
| 53 |
+
return scaler_y.inverse_transform(scaled_pred)
|
| 54 |
+
|
| 55 |
+
# Use a subset of training data as background
|
| 56 |
+
background_sample = X_background.sample(n=min(100, len(X_background)), random_state=42)
|
| 57 |
+
explainer = shap.KernelExplainer(predict_fn, background_sample)
|
| 58 |
+
|
| 59 |
+
# Calculate SHAP values for the input
|
| 60 |
+
shap_values = explainer.shap_values(input_df)
|
| 61 |
+
|
| 62 |
+
# Handle different SHAP value formats
|
| 63 |
+
if isinstance(shap_values, list):
|
| 64 |
+
shap_values = np.array(shap_values[0])
|
| 65 |
+
|
| 66 |
+
# Ensure correct shape for waterfall plot
|
| 67 |
+
if len(shap_values.shape) > 1:
|
| 68 |
+
if shap_values.shape[0] == len(feature_names):
|
| 69 |
+
shap_values = shap_values.T
|
| 70 |
+
shap_values = shap_values.flatten()
|
| 71 |
+
|
| 72 |
+
# Create waterfall plot
|
| 73 |
+
plt.figure(figsize=(12, 8))
|
| 74 |
+
shap.plots.waterfall(
|
| 75 |
+
shap.Explanation(
|
| 76 |
+
values=shap_values,
|
| 77 |
+
base_values=explainer.expected_value if np.isscalar(explainer.expected_value)
|
| 78 |
+
else explainer.expected_value[0],
|
| 79 |
+
data=input_df.iloc[0].values,
|
| 80 |
+
feature_names=feature_names
|
| 81 |
+
),
|
| 82 |
+
show=False
|
| 83 |
+
)
|
| 84 |
+
plt.title('SHAP Value Contributions')
|
| 85 |
+
plt.tight_layout()
|
| 86 |
+
plt.savefig('shap_explanation.png', dpi=300, bbox_inches='tight')
|
| 87 |
+
plt.close()
|
| 88 |
+
|
| 89 |
+
return explainer.expected_value, shap_values
|
| 90 |
+
|
| 91 |
+
# Load model and scalers
|
| 92 |
+
try:
|
| 93 |
+
model, scaler_X, scaler_y, feature_names, X = load_model_and_scalers()
|
| 94 |
+
|
| 95 |
+
# Create input fields for features
|
| 96 |
+
st.subheader("Input Features")
|
| 97 |
+
|
| 98 |
+
# Create two columns for input fields
|
| 99 |
+
col1, col2 = st.columns(2)
|
| 100 |
+
|
| 101 |
+
# Dictionary to store input values
|
| 102 |
+
input_values = {}
|
| 103 |
+
|
| 104 |
+
# Create input fields split between two columns
|
| 105 |
+
for i, feature in enumerate(feature_names):
|
| 106 |
+
# Get min and max values for each feature
|
| 107 |
+
min_val = float(X[feature].min())
|
| 108 |
+
max_val = float(X[feature].max())
|
| 109 |
+
|
| 110 |
+
# Add input field to alternating columns
|
| 111 |
+
with col1 if i < len(feature_names)//2 else col2:
|
| 112 |
+
input_values[feature] = st.number_input(
|
| 113 |
+
f"{feature}",
|
| 114 |
+
min_value=float(min_val),
|
| 115 |
+
max_value=float(max_val),
|
| 116 |
+
value=float(X[feature].mean()),
|
| 117 |
+
help=f"Range: {min_val:.2f} to {max_val:.2f}"
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Add predict button
|
| 121 |
+
if st.button("Predict"):
|
| 122 |
+
# Create input DataFrame
|
| 123 |
+
input_df = pd.DataFrame([input_values])
|
| 124 |
+
|
| 125 |
+
# Make prediction
|
| 126 |
+
prediction = model_predict(model, input_df, scaler_X, scaler_y)
|
| 127 |
+
|
| 128 |
+
# Display prediction
|
| 129 |
+
st.subheader("Prediction Result")
|
| 130 |
+
st.markdown(f"### Predicted Resistivity: {prediction[0]:.2f}")
|
| 131 |
+
|
| 132 |
+
# Calculate and display SHAP values
|
| 133 |
+
st.subheader("Feature Importance Explanation")
|
| 134 |
+
|
| 135 |
+
# Get SHAP values using the training data as background
|
| 136 |
+
expected_value, shap_values = explain_prediction(
|
| 137 |
+
model, input_df, X, scaler_X, scaler_y, feature_names
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
# Display the waterfall plot
|
| 141 |
+
st.image('shap_explanation.png')
|
| 142 |
+
|
| 143 |
+
except Exception as e:
|
| 144 |
+
st.error(f"""
|
| 145 |
+
Error loading the model and data. Please make sure:
|
| 146 |
+
1. The model file 'model.pth' exists
|
| 147 |
+
2. The data file 'data.xlsx' exists
|
| 148 |
+
3. All required packages are installed
|
| 149 |
+
|
| 150 |
+
Error details: {str(e)}
|
| 151 |
+
""")
|
data.xlsx
ADDED
|
Binary file (26.3 kB). View file
|
|
|
model.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d333cc2a9d3c4f94c55f32b766e58643dbc514a24bea307a8b2aa80dd8d609b0
|
| 3 |
+
size 105820
|
predict.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import torch.nn as nn
|
| 5 |
+
|
| 6 |
+
class TabularTransformer(nn.Module):
|
| 7 |
+
def __init__(self, input_dim=7, output_dim=1, embedding_dim=64, num_heads=8, hidden_dim=128):
|
| 8 |
+
super().__init__()
|
| 9 |
+
self.embedding = nn.Linear(input_dim, embedding_dim)
|
| 10 |
+
self.attention = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads)
|
| 11 |
+
self.fc = nn.Sequential(
|
| 12 |
+
nn.Linear(embedding_dim, hidden_dim),
|
| 13 |
+
nn.ReLU(),
|
| 14 |
+
nn.Linear(hidden_dim, output_dim)
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
def forward(self, x):
|
| 18 |
+
x = self.embedding(x)
|
| 19 |
+
x = x.unsqueeze(0) # Add sequence dimension for attention
|
| 20 |
+
attn_out, _ = self.attention(x, x, x)
|
| 21 |
+
x = attn_out.squeeze(0) # Remove sequence dimension
|
| 22 |
+
return self.fc(x)
|
| 23 |
+
|
| 24 |
+
def model_predict(model, X_input, scaler_X, scaler_y):
|
| 25 |
+
# Convert to tensor
|
| 26 |
+
X_scaled = scaler_X.transform(X_input)
|
| 27 |
+
X_tensor = torch.FloatTensor(X_scaled)
|
| 28 |
+
|
| 29 |
+
# Make prediction
|
| 30 |
+
with torch.no_grad():
|
| 31 |
+
scaled_pred = model(X_tensor).numpy()
|
| 32 |
+
|
| 33 |
+
# Inverse transform to get original scale prediction
|
| 34 |
+
prediction = scaler_y.inverse_transform(scaled_pred)
|
| 35 |
+
return prediction.flatten()
|
requirements.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.32.0
|
| 2 |
+
pandas==2.2.0
|
| 3 |
+
numpy==1.26.4
|
| 4 |
+
torch==2.2.0
|
| 5 |
+
scikit-learn==1.4.0
|
| 6 |
+
matplotlib==3.8.3
|
| 7 |
+
shap==0.44.0
|
| 8 |
+
openpyxl==3.1.2
|