Spaces:
Sleeping
Sleeping
Upload 32 files
Browse files- .dockerignore +9 -0
- Dockerfile +30 -0
- README.md +118 -12
- mcp_client_config.json +8 -0
- models/neural_network_dft_band_gap_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/neural_network_energy_above_hull_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/neural_network_ff_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/neural_network_jsc_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/neural_network_pce_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/neural_network_stability_retention_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/neural_network_stability_t80_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/neural_network_voc_layers-128-64-32_seed-42_batch-32.pth +3 -0
- models/random_forest_dft_band_gap_layers-NA_seed-42_batch-32.pkl +3 -0
- models/random_forest_energy_above_hull_layers-NA_seed-42_batch-32.pkl +3 -0
- models/random_forest_ff_layers-NA_seed-42_batch-32.pkl +3 -0
- models/random_forest_jsc_layers-NA_seed-42_batch-32.pkl +3 -0
- models/random_forest_pce_layers-NA_seed-42_batch-32.pkl +3 -0
- models/random_forest_stability_retention_layers-NA_seed-42_batch-32.pkl +3 -0
- models/random_forest_stability_t80_layers-NA_seed-42_batch-32.pkl +3 -0
- models/random_forest_voc_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_dft_band_gap_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_energy_above_hull_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_ff_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_jsc_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_pce_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_stability_retention_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_stability_t80_layers-NA_seed-42_batch-32.pkl +3 -0
- models/xgboost_voc_layers-NA_seed-42_batch-32.pkl +3 -0
- prepare_deploy.ps1 +44 -0
- prepare_deploy.sh +45 -0
- requirements.txt +7 -0
- server.py +566 -0
.dockerignore
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.git
|
| 2 |
+
__pycache__
|
| 3 |
+
*.pyc
|
| 4 |
+
.env
|
| 5 |
+
.venv
|
| 6 |
+
*.log
|
| 7 |
+
prepare_deploy.sh
|
| 8 |
+
prepare_deploy.ps1
|
| 9 |
+
mcp_client_config.json
|
Dockerfile
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# 安装系统依赖
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
gcc \
|
| 8 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
+
|
| 10 |
+
# 复制依赖文件
|
| 11 |
+
COPY requirements.txt .
|
| 12 |
+
|
| 13 |
+
# 安装 Python 依赖
|
| 14 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 15 |
+
|
| 16 |
+
# 复制应用代码
|
| 17 |
+
COPY server.py .
|
| 18 |
+
COPY models/ ./models/
|
| 19 |
+
|
| 20 |
+
# 设置环境变量
|
| 21 |
+
ENV MODEL_DIR=/app/models
|
| 22 |
+
ENV MCP_TRANSPORT=sse
|
| 23 |
+
ENV HOST=0.0.0.0
|
| 24 |
+
ENV PORT=7860
|
| 25 |
+
|
| 26 |
+
# 暴露端口
|
| 27 |
+
EXPOSE 7860
|
| 28 |
+
|
| 29 |
+
# 启动服务
|
| 30 |
+
CMD ["python", "server.py"]
|
README.md
CHANGED
|
@@ -1,12 +1,118 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Psc Predict
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
-
sdk:
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Psc Predict MCP Server
|
| 3 |
+
emoji: ☀️
|
| 4 |
+
colorFrom: yellow
|
| 5 |
+
colorTo: yellow
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
pinned: false
|
| 9 |
+
license: mit
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
# Psc_Predict MCP Server
|
| 13 |
+
|
| 14 |
+
Perovskite Solar Cell Performance Prediction MCP Service
|
| 15 |
+
|
| 16 |
+
## Features
|
| 17 |
+
|
| 18 |
+
Predict 8 performance metrics from CIF crystal structure files:
|
| 19 |
+
|
| 20 |
+
| Metric | Description | Unit |
|
| 21 |
+
|--------|-------------|------|
|
| 22 |
+
| PCE | Power Conversion Efficiency | % |
|
| 23 |
+
| DFT Band Gap | DFT calculated band gap | eV |
|
| 24 |
+
| Energy Above Hull | Thermodynamic stability | eV/atom |
|
| 25 |
+
| Stability Retention | Stability retention percentage | % |
|
| 26 |
+
| Stability T80 | T80 lifetime | hours |
|
| 27 |
+
| Voc | Open-circuit voltage | V |
|
| 28 |
+
| Jsc | Short-circuit current density | mA/cm² |
|
| 29 |
+
| FF | Fill factor | - |
|
| 30 |
+
|
| 31 |
+
## Available Model Types
|
| 32 |
+
|
| 33 |
+
| Model | Description | Recommendation |
|
| 34 |
+
|-------|-------------|----------------|
|
| 35 |
+
| **XGBoost** | Gradient boosting ensemble | ⭐ Default & Recommended |
|
| 36 |
+
| **Random Forest** | Ensemble of decision trees | Good interpretability |
|
| 37 |
+
| **Neural Network** | 3-layer MLP (128-64-32) | Deep learning approach |
|
| 38 |
+
|
| 39 |
+
## MCP Connection
|
| 40 |
+
|
| 41 |
+
### SSE Connection Configuration
|
| 42 |
+
|
| 43 |
+
```json
|
| 44 |
+
{
|
| 45 |
+
"mcpServers": {
|
| 46 |
+
"psc-predict": {
|
| 47 |
+
"url": "https://your-space.hf.space/sse"
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
}
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### Available Tools (Simplified)
|
| 54 |
+
|
| 55 |
+
| Tool | Description | Parameters |
|
| 56 |
+
|------|-------------|------------|
|
| 57 |
+
| `predict_ensemble` | **Predict using ALL 3 models with ensemble statistics** | cif, targets |
|
| 58 |
+
| `parse_cif_features` | Parse CIF and extract features | cif |
|
| 59 |
+
| `get_model_info` | Get model information | - |
|
| 60 |
+
| `list_available_models` | List all available models | - |
|
| 61 |
+
|
| 62 |
+
### Ensemble Prediction (Autonomous Multi-Model)
|
| 63 |
+
|
| 64 |
+
The `predict_ensemble` tool automatically calls all three models and provides:
|
| 65 |
+
- Individual predictions from XGBoost, Random Forest, and Neural Network
|
| 66 |
+
- Ensemble statistics: mean, std, min, max, range
|
| 67 |
+
- Recommendation based on model performance benchmarks
|
| 68 |
+
|
| 69 |
+
Example response:
|
| 70 |
+
```json
|
| 71 |
+
{
|
| 72 |
+
"results": {
|
| 73 |
+
"pce": {
|
| 74 |
+
"predictions": {"xgboost": 18.5, "random_forest": 17.8, "neural_network": 19.1},
|
| 75 |
+
"ensemble": {"mean": 18.47, "std": 0.53, "range": 1.3},
|
| 76 |
+
"recommendation": {"model": "xgboost", "value": 18.5}
|
| 77 |
+
}
|
| 78 |
+
}
|
| 79 |
+
}
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
## Input Example
|
| 83 |
+
|
| 84 |
+
```cif
|
| 85 |
+
data_CsPbI3
|
| 86 |
+
_cell_length_a 6.2894
|
| 87 |
+
_cell_length_b 6.2894
|
| 88 |
+
_cell_length_c 6.2894
|
| 89 |
+
_cell_angle_alpha 90.0
|
| 90 |
+
_cell_angle_beta 90.0
|
| 91 |
+
_cell_angle_gamma 90.0
|
| 92 |
+
_cell_volume 248.89
|
| 93 |
+
_chemical_formula_sum 'Cs1 Pb1 I3'
|
| 94 |
+
```
|
| 95 |
+
|
| 96 |
+
## Model Information
|
| 97 |
+
|
| 98 |
+
- **Model Types**: XGBoost (recommended), Random Forest, Neural Network
|
| 99 |
+
- **Input Features**: 93-dimensional (7 lattice parameters + 86 element fractions)
|
| 100 |
+
- **Training Data**: Perovskite solar cell database
|
| 101 |
+
|
| 102 |
+
## Local Development
|
| 103 |
+
|
| 104 |
+
```bash
|
| 105 |
+
# Install dependencies
|
| 106 |
+
pip install -r requirements.txt
|
| 107 |
+
|
| 108 |
+
# Run server
|
| 109 |
+
python server.py
|
| 110 |
+
|
| 111 |
+
# Or with Docker
|
| 112 |
+
docker build -t psc-predict .
|
| 113 |
+
docker run -p 7860:7860 psc-predict
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
## License
|
| 117 |
+
|
| 118 |
+
MIT
|
mcp_client_config.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mcpServers": {
|
| 3 |
+
"psc-predict": {
|
| 4 |
+
"url": "https://YOUR_USERNAME-psc-predict.hf.space/sse",
|
| 5 |
+
"description": "钙钛矿太阳能电池性能预测服务"
|
| 6 |
+
}
|
| 7 |
+
}
|
| 8 |
+
}
|
models/neural_network_dft_band_gap_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f478babc7d56f62a411f0598cf7b34beb6266527a00b77d570a11cce23ed6ea
|
| 3 |
+
size 102275
|
models/neural_network_energy_above_hull_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef4a292af2b075729071d487ed70a01d853d265eca01437c6971b0df3c617643
|
| 3 |
+
size 102666
|
models/neural_network_ff_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:060b02ee1f4b3d62d3a6f26d110ed5c9de696976e059c20a3d5ba562f74d8e33
|
| 3 |
+
size 101941
|
models/neural_network_jsc_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10f4e65abb8134b3771dec100e7e3249a78cbc183aeac481f23c32adc2576142
|
| 3 |
+
size 101968
|
models/neural_network_pce_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60abaddc2c19ab9bc6ad1fc716ff35973b11648085070a922c81d34f1a3744f1
|
| 3 |
+
size 101968
|
models/neural_network_stability_retention_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7237f1d0c204d04bab1db3993b9bd7afdd5bd6aaf06c9b487dd4831182e61782
|
| 3 |
+
size 102784
|
models/neural_network_stability_t80_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf10f974da99fa1a918024adfb144d1ff14f26e596675c0fda5ab2aca94948f2
|
| 3 |
+
size 102302
|
models/neural_network_voc_layers-128-64-32_seed-42_batch-32.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b61326b22bcadcda1597dc6727b03859046351fb4c59bdf056a8d535685e21ce
|
| 3 |
+
size 101968
|
models/random_forest_dft_band_gap_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:454a66719e5eeffff2bdc9b1c539cb184a1ec1165a5f859367b6dde2e7b33be9
|
| 3 |
+
size 78465
|
models/random_forest_energy_above_hull_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54898c9f2964e812a89ec11431a0f3d842cf22c2aff2eb20e5fc46af1331987a
|
| 3 |
+
size 408945
|
models/random_forest_ff_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6855a7a821ed30f410faa928d76e1bab616b0abea2a50394a189411b07167cc7
|
| 3 |
+
size 837633
|
models/random_forest_jsc_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cad18bc4c396811c73141e1531a8b9aca2ba70d863efa0f6c0456b625d3dd462
|
| 3 |
+
size 493185
|
models/random_forest_pce_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:543123bf0091ef5891e839f344320d3fa5f056cc6f4ec810940fb757afda6105
|
| 3 |
+
size 930081
|
models/random_forest_stability_retention_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:885d1f08f1d0408ecf31e56175e37e631594ad70ed22a5a24b97826460501ad9
|
| 3 |
+
size 206481
|
models/random_forest_stability_t80_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5153ef576c9f328022e00f4930c0008a8865245c22dd0dfcf0406a6d35b66c7
|
| 3 |
+
size 246513
|
models/random_forest_voc_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e10b19fa6d3d411bb8d0153984d9040581308306bf624a5095d88f5457dbc2b6
|
| 3 |
+
size 641937
|
models/xgboost_dft_band_gap_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8eee2e7754c1e53858fbdf6b9b8005af12a5ac3833f65a11fc29213ea2877b83
|
| 3 |
+
size 144513
|
models/xgboost_energy_above_hull_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61f32e26d376e085bf2957597a1ecb28ed6c63b7b62e26486fbdd617a9a96712
|
| 3 |
+
size 179733
|
models/xgboost_ff_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d0ebd9de2ad79e2b4a91d080d73be30a81fc8c70b87e6ef3b9a0e67b23ce4d1
|
| 3 |
+
size 236807
|
models/xgboost_jsc_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1d812b2d0ce4290360bd19dddd9283ef0081382a4474b38ada248ff03db798f
|
| 3 |
+
size 251791
|
models/xgboost_pce_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd56cf3600c8ee298be757665f1951e3186da94c7a8ebf49f07867e15e2a1e4c
|
| 3 |
+
size 344777
|
models/xgboost_stability_retention_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2443fcb970bc15a32facb23f69d050858a6badffdfd4bf81bfaffb9c98ff241a
|
| 3 |
+
size 188915
|
models/xgboost_stability_t80_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:196c918ffc90fa2cb39ed20734b03c85754c06d130aa5947f16c75c3289f776e
|
| 3 |
+
size 202134
|
models/xgboost_voc_layers-NA_seed-42_batch-32.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3b046d876ea815280e942691364db7fbd98851133bca2b540e04999f5f257c4
|
| 3 |
+
size 232382
|
prepare_deploy.ps1
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 准备 HuggingFace Docker 部署文件 (Windows PowerShell)
|
| 2 |
+
|
| 3 |
+
Write-Host "==========================================" -ForegroundColor Cyan
|
| 4 |
+
Write-Host "Preparing Psc_Predict for HuggingFace" -ForegroundColor Cyan
|
| 5 |
+
Write-Host "==========================================" -ForegroundColor Cyan
|
| 6 |
+
|
| 7 |
+
# 创建 models 目录
|
| 8 |
+
if (-not (Test-Path "models")) {
|
| 9 |
+
New-Item -ItemType Directory -Path "models" | Out-Null
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
# 从 Psc_Predict 复制模型文件
|
| 13 |
+
Write-Host "`nCopying XGBoost models..." -ForegroundColor Yellow
|
| 14 |
+
Copy-Item "..\Psc_Predict\Psc_Predict\output\xgboost\*.pkl" -Destination "models\" -Force
|
| 15 |
+
|
| 16 |
+
# 检查模型文件
|
| 17 |
+
Write-Host "`nModels copied:" -ForegroundColor Green
|
| 18 |
+
Get-ChildItem models\
|
| 19 |
+
|
| 20 |
+
# 显示文件结构
|
| 21 |
+
Write-Host "`nDirectory structure:" -ForegroundColor Green
|
| 22 |
+
Write-Host "Psc_Predict_server/"
|
| 23 |
+
Write-Host "├── Dockerfile"
|
| 24 |
+
Write-Host "├── README.md"
|
| 25 |
+
Write-Host "├── requirements.txt"
|
| 26 |
+
Write-Host "├── server.py"
|
| 27 |
+
Write-Host "└── models/"
|
| 28 |
+
Get-ChildItem models\ | ForEach-Object { Write-Host " ├── $($_.Name)" }
|
| 29 |
+
|
| 30 |
+
Write-Host "`n==========================================" -ForegroundColor Cyan
|
| 31 |
+
Write-Host "Ready to deploy!" -ForegroundColor Green
|
| 32 |
+
Write-Host ""
|
| 33 |
+
Write-Host "Next steps:" -ForegroundColor Yellow
|
| 34 |
+
Write-Host "1. Create a new HuggingFace Space (Docker SDK)"
|
| 35 |
+
Write-Host "2. Upload all files to the Space"
|
| 36 |
+
Write-Host "3. Wait for build and deployment"
|
| 37 |
+
Write-Host ""
|
| 38 |
+
Write-Host "Or use git:" -ForegroundColor Yellow
|
| 39 |
+
Write-Host " git init"
|
| 40 |
+
Write-Host " git remote add origin https://huggingface.co/spaces/YOUR_USERNAME/psc-predict"
|
| 41 |
+
Write-Host " git add ."
|
| 42 |
+
Write-Host " git commit -m 'Initial commit'"
|
| 43 |
+
Write-Host " git push -u origin main"
|
| 44 |
+
Write-Host "==========================================" -ForegroundColor Cyan
|
prepare_deploy.sh
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# 准备 HuggingFace Docker 部署文件
|
| 3 |
+
|
| 4 |
+
echo "=========================================="
|
| 5 |
+
echo "Preparing Psc_Predict for HuggingFace"
|
| 6 |
+
echo "=========================================="
|
| 7 |
+
|
| 8 |
+
# 创建 models 目录
|
| 9 |
+
mkdir -p models
|
| 10 |
+
|
| 11 |
+
# 从 Psc_Predict 复制模型文件
|
| 12 |
+
echo "Copying XGBoost models..."
|
| 13 |
+
cp ../Psc_Predict/Psc_Predict/output/xgboost/*.pkl models/
|
| 14 |
+
|
| 15 |
+
# 检查模型文件
|
| 16 |
+
echo ""
|
| 17 |
+
echo "Models copied:"
|
| 18 |
+
ls -la models/
|
| 19 |
+
|
| 20 |
+
# 检查文件结构
|
| 21 |
+
echo ""
|
| 22 |
+
echo "Directory structure:"
|
| 23 |
+
echo "├── Dockerfile"
|
| 24 |
+
echo "├── README.md"
|
| 25 |
+
echo "├── requirements.txt"
|
| 26 |
+
echo "├── server.py"
|
| 27 |
+
echo "└── models/"
|
| 28 |
+
ls models/ | sed 's/^/ ├── /'
|
| 29 |
+
|
| 30 |
+
echo ""
|
| 31 |
+
echo "=========================================="
|
| 32 |
+
echo "Ready to deploy!"
|
| 33 |
+
echo ""
|
| 34 |
+
echo "Next steps:"
|
| 35 |
+
echo "1. Create a new HuggingFace Space (Docker SDK)"
|
| 36 |
+
echo "2. Upload all files to the Space"
|
| 37 |
+
echo "3. Wait for build and deployment"
|
| 38 |
+
echo ""
|
| 39 |
+
echo "Or use git:"
|
| 40 |
+
echo " git init"
|
| 41 |
+
echo " git remote add origin https://huggingface.co/spaces/YOUR_USERNAME/psc-predict"
|
| 42 |
+
echo " git add ."
|
| 43 |
+
echo " git commit -m 'Initial commit'"
|
| 44 |
+
echo " git push -u origin main"
|
| 45 |
+
echo "=========================================="
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mcp[cli]>=1.0.0
|
| 2 |
+
fastmcp>=0.1.0
|
| 3 |
+
numpy>=1.21.0
|
| 4 |
+
scikit-learn>=1.0.0
|
| 5 |
+
xgboost>=1.7.0
|
| 6 |
+
uvicorn>=0.20.0
|
| 7 |
+
starlette>=0.25.0
|
server.py
ADDED
|
@@ -0,0 +1,566 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python
|
| 2 |
+
# coding=utf-8
|
| 3 |
+
"""
|
| 4 |
+
Psc_Predict MCP Server
|
| 5 |
+
|
| 6 |
+
Perovskite Solar Cell Performance Prediction MCP Service
|
| 7 |
+
Using FastMCP framework with SSE transport
|
| 8 |
+
Designed for HuggingFace Docker deployment
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import re
|
| 13 |
+
import pickle
|
| 14 |
+
import logging
|
| 15 |
+
from typing import Dict, List, Optional, Any
|
| 16 |
+
|
| 17 |
+
import numpy as np
|
| 18 |
+
import torch
|
| 19 |
+
import torch.nn as nn
|
| 20 |
+
from mcp.server.fastmcp import FastMCP
|
| 21 |
+
|
| 22 |
+
# Configure logging
|
| 23 |
+
logging.basicConfig(level=logging.INFO)
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
# Initialize FastMCP server
|
| 27 |
+
mcp = FastMCP(
|
| 28 |
+
"Psc_Predict",
|
| 29 |
+
description="Perovskite Solar Cell Performance Prediction Service - Predict PCE, bandgap, stability and other properties from CIF crystal structures"
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# ============ CIF Parser ============
|
| 33 |
+
class CIFParser:
|
| 34 |
+
"""Extract crystallographic features from CIF content (93 dimensions)"""
|
| 35 |
+
|
| 36 |
+
def __init__(self):
|
| 37 |
+
self.elements = [
|
| 38 |
+
'H', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'K', 'Ca',
|
| 39 |
+
'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Rb',
|
| 40 |
+
'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te',
|
| 41 |
+
'I', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er',
|
| 42 |
+
'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi',
|
| 43 |
+
'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U'
|
| 44 |
+
]
|
| 45 |
+
self.elem_to_idx = {e: i for i, e in enumerate(self.elements)}
|
| 46 |
+
|
| 47 |
+
def parse(self, cif_text: str) -> np.ndarray:
|
| 48 |
+
"""Parse CIF string and return 93-dimensional feature vector"""
|
| 49 |
+
# Handle escaped newlines
|
| 50 |
+
if "\\n" in cif_text:
|
| 51 |
+
cif_text = cif_text.replace("\\n", "\n")
|
| 52 |
+
|
| 53 |
+
# A. Extract lattice parameters (7 dimensions)
|
| 54 |
+
patterns = {
|
| 55 |
+
'a': r"_cell_length_a\s+([\d\.]+)",
|
| 56 |
+
'b': r"_cell_length_b\s+([\d\.]+)",
|
| 57 |
+
'c': r"_cell_length_c\s+([\d\.]+)",
|
| 58 |
+
'alpha': r"_cell_angle_alpha\s+([\d\.]+)",
|
| 59 |
+
'beta': r"_cell_angle_beta\s+([\d\.]+)",
|
| 60 |
+
'gamma': r"_cell_angle_gamma\s+([\d\.]+)",
|
| 61 |
+
'vol': r"_cell_volume\s+([\d\.]+)"
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
lattice_feats = []
|
| 65 |
+
for key, pat in patterns.items():
|
| 66 |
+
match = re.search(pat, cif_text)
|
| 67 |
+
val = float(match.group(1)) if match else 0.0
|
| 68 |
+
lattice_feats.append(val)
|
| 69 |
+
|
| 70 |
+
# B. Extract element composition (86 dimensions)
|
| 71 |
+
chem_match = re.search(r"_chemical_formula_sum\s+'?([^'\n]+)'?", cif_text)
|
| 72 |
+
elem_vec = np.zeros(len(self.elements))
|
| 73 |
+
|
| 74 |
+
if chem_match:
|
| 75 |
+
formula = chem_match.group(1)
|
| 76 |
+
parts = formula.replace("'", "").split()
|
| 77 |
+
for part in parts:
|
| 78 |
+
m = re.match(r"([A-Za-z]+)([\d\.]*)", part)
|
| 79 |
+
if m:
|
| 80 |
+
el = m.group(1)
|
| 81 |
+
num = float(m.group(2)) if m.group(2) else 1.0
|
| 82 |
+
if el in self.elem_to_idx:
|
| 83 |
+
elem_vec[self.elem_to_idx[el]] = num
|
| 84 |
+
|
| 85 |
+
# Normalize element vector
|
| 86 |
+
total_atoms = np.sum(elem_vec)
|
| 87 |
+
if total_atoms > 0:
|
| 88 |
+
elem_vec = elem_vec / total_atoms
|
| 89 |
+
|
| 90 |
+
return np.concatenate([lattice_feats, elem_vec])
|
| 91 |
+
|
| 92 |
+
def get_feature_names(self) -> List[str]:
|
| 93 |
+
return ['a', 'b', 'c', 'alpha', 'beta', 'gamma', 'vol'] + self.elements
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
# ============ Neural Network Model ============
|
| 97 |
+
class MaterialNN(nn.Module):
|
| 98 |
+
"""Neural Network for material property prediction"""
|
| 99 |
+
def __init__(self, input_dim, hidden_dims=[128, 64, 32]):
|
| 100 |
+
super(MaterialNN, self).__init__()
|
| 101 |
+
layers = []
|
| 102 |
+
in_d = input_dim
|
| 103 |
+
for h_d in hidden_dims:
|
| 104 |
+
layers.append(nn.Linear(in_d, h_d))
|
| 105 |
+
layers.append(nn.ReLU())
|
| 106 |
+
layers.append(nn.BatchNorm1d(h_d))
|
| 107 |
+
in_d = h_d
|
| 108 |
+
layers.append(nn.Linear(in_d, 1))
|
| 109 |
+
self.net = nn.Sequential(*layers)
|
| 110 |
+
|
| 111 |
+
def forward(self, x):
|
| 112 |
+
return self.net(x)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
# ============ Model Manager ============
|
| 116 |
+
class ModelManager:
|
| 117 |
+
"""Manage all pretrained models (XGBoost, Random Forest, Neural Network)"""
|
| 118 |
+
|
| 119 |
+
TARGETS = ['pce', 'dft_band_gap', 'energy_above_hull', 'stability_retention',
|
| 120 |
+
'stability_t80', 'voc', 'jsc', 'ff']
|
| 121 |
+
|
| 122 |
+
MODEL_TYPES = ['xgboost', 'random_forest', 'neural_network']
|
| 123 |
+
|
| 124 |
+
TARGET_INFO = {
|
| 125 |
+
'pce': {'name': 'Power Conversion Efficiency', 'unit': '%'},
|
| 126 |
+
'dft_band_gap': {'name': 'DFT Band Gap', 'unit': 'eV'},
|
| 127 |
+
'energy_above_hull': {'name': 'Energy Above Hull', 'unit': 'eV/atom'},
|
| 128 |
+
'stability_retention': {'name': 'Stability Retention', 'unit': '%'},
|
| 129 |
+
'stability_t80': {'name': 'T80 Lifetime', 'unit': 'hours'},
|
| 130 |
+
'voc': {'name': 'Open Circuit Voltage', 'unit': 'V'},
|
| 131 |
+
'jsc': {'name': 'Short Circuit Current Density', 'unit': 'mA/cm²'},
|
| 132 |
+
'ff': {'name': 'Fill Factor', 'unit': ''}
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
def __init__(self, model_dir: str = "./models"):
|
| 136 |
+
self.model_dir = model_dir
|
| 137 |
+
self.models: Dict[str, Dict[str, Any]] = {
|
| 138 |
+
'xgboost': {},
|
| 139 |
+
'random_forest': {},
|
| 140 |
+
'neural_network': {}
|
| 141 |
+
}
|
| 142 |
+
self.cif_parser = CIFParser()
|
| 143 |
+
self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 144 |
+
self._load_all_models()
|
| 145 |
+
|
| 146 |
+
def _load_all_models(self):
|
| 147 |
+
"""Load all available models"""
|
| 148 |
+
# Load XGBoost models
|
| 149 |
+
for target in self.TARGETS:
|
| 150 |
+
model_path = os.path.join(
|
| 151 |
+
self.model_dir,
|
| 152 |
+
f"xgboost_{target}_layers-NA_seed-42_batch-32.pkl"
|
| 153 |
+
)
|
| 154 |
+
if os.path.exists(model_path):
|
| 155 |
+
try:
|
| 156 |
+
with open(model_path, 'rb') as f:
|
| 157 |
+
self.models['xgboost'][target] = pickle.load(f)
|
| 158 |
+
logger.info(f"Loaded XGBoost model for {target}")
|
| 159 |
+
except Exception as e:
|
| 160 |
+
logger.warning(f"Failed to load XGBoost model for {target}: {e}")
|
| 161 |
+
|
| 162 |
+
# Load Random Forest models
|
| 163 |
+
for target in self.TARGETS:
|
| 164 |
+
model_path = os.path.join(
|
| 165 |
+
self.model_dir,
|
| 166 |
+
f"random_forest_{target}_layers-NA_seed-42_batch-32.pkl"
|
| 167 |
+
)
|
| 168 |
+
if os.path.exists(model_path):
|
| 169 |
+
try:
|
| 170 |
+
with open(model_path, 'rb') as f:
|
| 171 |
+
self.models['random_forest'][target] = pickle.load(f)
|
| 172 |
+
logger.info(f"Loaded Random Forest model for {target}")
|
| 173 |
+
except Exception as e:
|
| 174 |
+
logger.warning(f"Failed to load Random Forest model for {target}: {e}")
|
| 175 |
+
|
| 176 |
+
# Load Neural Network models
|
| 177 |
+
for target in self.TARGETS:
|
| 178 |
+
model_path = os.path.join(
|
| 179 |
+
self.model_dir,
|
| 180 |
+
f"neural_network_{target}_layers-128-64-32_seed-42_batch-32.pth"
|
| 181 |
+
)
|
| 182 |
+
if os.path.exists(model_path):
|
| 183 |
+
try:
|
| 184 |
+
model = MaterialNN(input_dim=93, hidden_dims=[128, 64, 32])
|
| 185 |
+
model.load_state_dict(torch.load(model_path, map_location=self.device))
|
| 186 |
+
model.to(self.device)
|
| 187 |
+
model.eval()
|
| 188 |
+
self.models['neural_network'][target] = model
|
| 189 |
+
logger.info(f"Loaded Neural Network model for {target}")
|
| 190 |
+
except Exception as e:
|
| 191 |
+
logger.warning(f"Failed to load Neural Network model for {target}: {e}")
|
| 192 |
+
|
| 193 |
+
def predict(self, cif_text: str, targets: Optional[List[str]] = None,
|
| 194 |
+
model_type: str = 'xgboost') -> Dict[str, float]:
|
| 195 |
+
"""Predict specified targets using selected model type"""
|
| 196 |
+
if model_type not in self.models:
|
| 197 |
+
raise ValueError(f"Unknown model type: {model_type}. Available: {self.MODEL_TYPES}")
|
| 198 |
+
|
| 199 |
+
if targets is None:
|
| 200 |
+
targets = list(self.models[model_type].keys())
|
| 201 |
+
|
| 202 |
+
# Parse CIF
|
| 203 |
+
features = self.cif_parser.parse(cif_text)
|
| 204 |
+
X = features.reshape(1, -1)
|
| 205 |
+
|
| 206 |
+
# Predict
|
| 207 |
+
results = {}
|
| 208 |
+
for target in targets:
|
| 209 |
+
if target in self.models[model_type]:
|
| 210 |
+
model = self.models[model_type][target]
|
| 211 |
+
if model_type == 'neural_network':
|
| 212 |
+
X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
|
| 213 |
+
with torch.no_grad():
|
| 214 |
+
pred = model(X_tensor).cpu().numpy().flatten()[0]
|
| 215 |
+
else:
|
| 216 |
+
pred = model.predict(X)[0]
|
| 217 |
+
results[target] = float(pred)
|
| 218 |
+
else:
|
| 219 |
+
results[target] = None
|
| 220 |
+
|
| 221 |
+
return results
|
| 222 |
+
|
| 223 |
+
def get_available_targets(self, model_type: str = 'xgboost') -> List[str]:
|
| 224 |
+
"""Return available prediction targets for a model type"""
|
| 225 |
+
if model_type in self.models:
|
| 226 |
+
return list(self.models[model_type].keys())
|
| 227 |
+
return []
|
| 228 |
+
|
| 229 |
+
def get_available_models(self) -> Dict[str, List[str]]:
|
| 230 |
+
"""Return all available models and their targets"""
|
| 231 |
+
return {
|
| 232 |
+
model_type: list(targets.keys())
|
| 233 |
+
for model_type, targets in self.models.items()
|
| 234 |
+
if targets
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
# Global model manager
|
| 239 |
+
model_manager: Optional[ModelManager] = None
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def get_model_manager() -> ModelManager:
|
| 243 |
+
"""Get or initialize model manager"""
|
| 244 |
+
global model_manager
|
| 245 |
+
if model_manager is None:
|
| 246 |
+
model_dir = os.environ.get("MODEL_DIR", "./models")
|
| 247 |
+
model_manager = ModelManager(model_dir)
|
| 248 |
+
return model_manager
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
# ============ MCP Tools ============
|
| 252 |
+
|
| 253 |
+
# Valid model types
|
| 254 |
+
VALID_MODEL_TYPES = ['xgboost', 'random_forest', 'neural_network']
|
| 255 |
+
DEFAULT_MODEL_TYPE = 'xgboost'
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
@mcp.tool()
|
| 259 |
+
def parse_cif_features(cif: str) -> Dict[str, Any]:
|
| 260 |
+
"""
|
| 261 |
+
Parse a CIF file and extract features for model prediction.
|
| 262 |
+
|
| 263 |
+
Extracts 93-dimensional features:
|
| 264 |
+
- 7 lattice parameters (a, b, c, alpha, beta, gamma, volume)
|
| 265 |
+
- 86 element composition fractions
|
| 266 |
+
|
| 267 |
+
Args:
|
| 268 |
+
cif: Crystal structure text in CIF format
|
| 269 |
+
|
| 270 |
+
Returns:
|
| 271 |
+
Dictionary containing lattice parameters and element composition
|
| 272 |
+
"""
|
| 273 |
+
manager = get_model_manager()
|
| 274 |
+
features = manager.cif_parser.parse(cif)
|
| 275 |
+
feature_names = manager.cif_parser.get_feature_names()
|
| 276 |
+
|
| 277 |
+
# Separate lattice parameters and element composition
|
| 278 |
+
lattice = dict(zip(feature_names[:7], features[:7].tolist()))
|
| 279 |
+
|
| 280 |
+
# Only return non-zero elements
|
| 281 |
+
composition = {}
|
| 282 |
+
for i, elem in enumerate(feature_names[7:]):
|
| 283 |
+
if features[7 + i] > 0:
|
| 284 |
+
composition[elem] = float(features[7 + i])
|
| 285 |
+
|
| 286 |
+
return {
|
| 287 |
+
"lattice_parameters": lattice,
|
| 288 |
+
"composition": composition,
|
| 289 |
+
"feature_dim": len(features),
|
| 290 |
+
"status": "success"
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
@mcp.tool()
|
| 295 |
+
def get_model_info() -> Dict[str, Any]:
|
| 296 |
+
"""
|
| 297 |
+
Get model information and available prediction targets.
|
| 298 |
+
|
| 299 |
+
Returns information about:
|
| 300 |
+
- Available model types (XGBoost, Random Forest, Neural Network)
|
| 301 |
+
- All 8 prediction targets and their availability
|
| 302 |
+
- Input feature dimensions
|
| 303 |
+
|
| 304 |
+
Returns:
|
| 305 |
+
Dictionary containing model information
|
| 306 |
+
"""
|
| 307 |
+
manager = get_model_manager()
|
| 308 |
+
|
| 309 |
+
# Get available targets for each model type
|
| 310 |
+
model_availability = {}
|
| 311 |
+
for mt in VALID_MODEL_TYPES:
|
| 312 |
+
available = manager.get_available_targets(model_type=mt)
|
| 313 |
+
model_availability[mt] = {
|
| 314 |
+
"available_targets": available,
|
| 315 |
+
"count": len(available)
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
targets_info = []
|
| 319 |
+
for target in ModelManager.TARGETS:
|
| 320 |
+
info = ModelManager.TARGET_INFO.get(target, {})
|
| 321 |
+
targets_info.append({
|
| 322 |
+
"id": target,
|
| 323 |
+
"name": info.get('name', target),
|
| 324 |
+
"unit": info.get('unit', ''),
|
| 325 |
+
"xgboost": target in model_availability['xgboost']['available_targets'],
|
| 326 |
+
"random_forest": target in model_availability['random_forest']['available_targets'],
|
| 327 |
+
"neural_network": target in model_availability['neural_network']['available_targets']
|
| 328 |
+
})
|
| 329 |
+
|
| 330 |
+
return {
|
| 331 |
+
"available_model_types": VALID_MODEL_TYPES,
|
| 332 |
+
"default_model_type": DEFAULT_MODEL_TYPE,
|
| 333 |
+
"recommended_model_type": "xgboost",
|
| 334 |
+
"input_features": 93,
|
| 335 |
+
"targets": targets_info,
|
| 336 |
+
"model_availability": model_availability,
|
| 337 |
+
"total_targets": len(ModelManager.TARGETS)
|
| 338 |
+
}
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
@mcp.tool()
|
| 342 |
+
def list_available_models() -> Dict[str, Any]:
|
| 343 |
+
"""
|
| 344 |
+
List all available models and their status.
|
| 345 |
+
|
| 346 |
+
Returns detailed information about which models are loaded and ready for inference.
|
| 347 |
+
|
| 348 |
+
Returns:
|
| 349 |
+
Dictionary containing model availability status for each target and model type
|
| 350 |
+
"""
|
| 351 |
+
manager = get_model_manager()
|
| 352 |
+
|
| 353 |
+
models_status = {}
|
| 354 |
+
for mt in VALID_MODEL_TYPES:
|
| 355 |
+
models_status[mt] = {}
|
| 356 |
+
for target in ModelManager.TARGETS:
|
| 357 |
+
key = f"{mt}_{target}"
|
| 358 |
+
is_loaded = key in manager.models
|
| 359 |
+
models_status[mt][target] = {
|
| 360 |
+
"loaded": is_loaded,
|
| 361 |
+
"status": "ready" if is_loaded else "not_available"
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
return {
|
| 365 |
+
"models": models_status,
|
| 366 |
+
"model_types": VALID_MODEL_TYPES,
|
| 367 |
+
"targets": ModelManager.TARGETS,
|
| 368 |
+
"status": "success"
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
@mcp.tool()
|
| 373 |
+
def predict_ensemble(cif: str, targets: Optional[List[str]] = None) -> Dict[str, Any]:
|
| 374 |
+
"""
|
| 375 |
+
Predict using all three model types and return ensemble results with comparison.
|
| 376 |
+
|
| 377 |
+
Automatically calls XGBoost, Random Forest, and Neural Network models for the same input,
|
| 378 |
+
allowing comparison of predictions across different model architectures.
|
| 379 |
+
Also provides ensemble statistics (mean, std, min, max) for each target.
|
| 380 |
+
|
| 381 |
+
Args:
|
| 382 |
+
cif: Crystal structure text in CIF format
|
| 383 |
+
targets: Optional list of specific targets to predict. If None, predicts all available targets.
|
| 384 |
+
Valid targets: pce, dft_band_gap, energy_above_hull, stability_retention,
|
| 385 |
+
stability_t80, voc, jsc, ff
|
| 386 |
+
|
| 387 |
+
Returns:
|
| 388 |
+
Dictionary containing predictions from all models and ensemble statistics
|
| 389 |
+
"""
|
| 390 |
+
import numpy as np
|
| 391 |
+
|
| 392 |
+
manager = get_model_manager()
|
| 393 |
+
|
| 394 |
+
# Determine targets to predict
|
| 395 |
+
if targets is None:
|
| 396 |
+
targets = ModelManager.TARGETS
|
| 397 |
+
|
| 398 |
+
# Collect predictions from all models
|
| 399 |
+
all_predictions = {}
|
| 400 |
+
for mt in VALID_MODEL_TYPES:
|
| 401 |
+
try:
|
| 402 |
+
result = manager.predict(cif, list(targets), model_type=mt)
|
| 403 |
+
all_predictions[mt] = result
|
| 404 |
+
except Exception as e:
|
| 405 |
+
all_predictions[mt] = {"error": str(e)}
|
| 406 |
+
|
| 407 |
+
# Calculate ensemble statistics for each target
|
| 408 |
+
ensemble_results = {}
|
| 409 |
+
for target in targets:
|
| 410 |
+
values = []
|
| 411 |
+
model_values = {}
|
| 412 |
+
|
| 413 |
+
for mt in VALID_MODEL_TYPES:
|
| 414 |
+
if mt in all_predictions and target in all_predictions[mt]:
|
| 415 |
+
val = all_predictions[mt][target]
|
| 416 |
+
if val is not None:
|
| 417 |
+
values.append(val)
|
| 418 |
+
model_values[mt] = val
|
| 419 |
+
else:
|
| 420 |
+
model_values[mt] = None
|
| 421 |
+
else:
|
| 422 |
+
model_values[mt] = None
|
| 423 |
+
|
| 424 |
+
info = ModelManager.TARGET_INFO.get(target, {})
|
| 425 |
+
|
| 426 |
+
if values:
|
| 427 |
+
ensemble_results[target] = {
|
| 428 |
+
"name": info.get('name', target),
|
| 429 |
+
"unit": info.get('unit', ''),
|
| 430 |
+
"predictions": model_values,
|
| 431 |
+
"ensemble": {
|
| 432 |
+
"mean": float(np.mean(values)),
|
| 433 |
+
"std": float(np.std(values)),
|
| 434 |
+
"min": float(np.min(values)),
|
| 435 |
+
"max": float(np.max(values)),
|
| 436 |
+
"range": float(np.max(values) - np.min(values)),
|
| 437 |
+
"n_models": len(values)
|
| 438 |
+
},
|
| 439 |
+
"recommendation": _get_best_prediction(target, model_values)
|
| 440 |
+
}
|
| 441 |
+
else:
|
| 442 |
+
ensemble_results[target] = {
|
| 443 |
+
"name": info.get('name', target),
|
| 444 |
+
"unit": info.get('unit', ''),
|
| 445 |
+
"predictions": model_values,
|
| 446 |
+
"ensemble": None,
|
| 447 |
+
"recommendation": None
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
return {
|
| 451 |
+
"targets_predicted": list(targets),
|
| 452 |
+
"models_used": VALID_MODEL_TYPES,
|
| 453 |
+
"results": ensemble_results,
|
| 454 |
+
"raw_predictions": all_predictions,
|
| 455 |
+
"status": "success"
|
| 456 |
+
}
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
def _get_best_prediction(target: str, model_values: Dict[str, float]) -> Dict[str, Any]:
|
| 460 |
+
"""
|
| 461 |
+
Provide recommendation based on model performance characteristics.
|
| 462 |
+
XGBoost is generally recommended as it has the best overall performance.
|
| 463 |
+
"""
|
| 464 |
+
# XGBoost is the recommended model based on benchmark results
|
| 465 |
+
if model_values.get('xgboost') is not None:
|
| 466 |
+
return {
|
| 467 |
+
"model": "xgboost",
|
| 468 |
+
"value": model_values['xgboost'],
|
| 469 |
+
"reason": "XGBoost recommended - best overall performance in benchmarks"
|
| 470 |
+
}
|
| 471 |
+
elif model_values.get('random_forest') is not None:
|
| 472 |
+
return {
|
| 473 |
+
"model": "random_forest",
|
| 474 |
+
"value": model_values['random_forest'],
|
| 475 |
+
"reason": "Random Forest - fallback when XGBoost unavailable"
|
| 476 |
+
}
|
| 477 |
+
elif model_values.get('neural_network') is not None:
|
| 478 |
+
return {
|
| 479 |
+
"model": "neural_network",
|
| 480 |
+
"value": model_values['neural_network'],
|
| 481 |
+
"reason": "Neural Network - fallback option"
|
| 482 |
+
}
|
| 483 |
+
return None
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
# ============ MCP Resources ============
|
| 487 |
+
|
| 488 |
+
@mcp.resource("psc://info")
|
| 489 |
+
def get_service_info() -> str:
|
| 490 |
+
"""Service information"""
|
| 491 |
+
return """
|
| 492 |
+
# Psc_Predict MCP Service
|
| 493 |
+
|
| 494 |
+
Perovskite Solar Cell Performance Prediction Service
|
| 495 |
+
|
| 496 |
+
## Features
|
| 497 |
+
- Predict 8 performance metrics from CIF crystal structures
|
| 498 |
+
- Support for single-target and multi-target prediction
|
| 499 |
+
- Multiple model types: XGBoost (recommended), Random Forest, Neural Network
|
| 500 |
+
|
| 501 |
+
## Available Model Types
|
| 502 |
+
1. **XGBoost** (default, recommended) - Best overall performance
|
| 503 |
+
2. **Random Forest** - Good interpretability
|
| 504 |
+
3. **Neural Network** - 3-layer MLP (128-64-32)
|
| 505 |
+
|
| 506 |
+
## Prediction Targets
|
| 507 |
+
1. PCE - Power Conversion Efficiency (%)
|
| 508 |
+
2. DFT Band Gap - DFT calculated band gap (eV)
|
| 509 |
+
3. Energy Above Hull - Thermodynamic stability (eV/atom)
|
| 510 |
+
4. Stability Retention - Stability retention percentage (%)
|
| 511 |
+
5. Stability T80 - T80 lifetime (hours)
|
| 512 |
+
6. Voc - Open-circuit voltage (V)
|
| 513 |
+
7. Jsc - Short-circuit current density (mA/cm²)
|
| 514 |
+
8. FF - Fill factor
|
| 515 |
+
|
| 516 |
+
## Input Format
|
| 517 |
+
CIF (Crystallographic Information File) format crystal structure text
|
| 518 |
+
|
| 519 |
+
## Usage Example
|
| 520 |
+
Call predict_pce(cif, model_type="xgboost") to predict PCE using XGBoost model.
|
| 521 |
+
"""
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
@mcp.resource("psc://example-cif")
|
| 525 |
+
def get_example_cif() -> str:
|
| 526 |
+
"""Example CIF file for testing"""
|
| 527 |
+
return """data_CsPbI3
|
| 528 |
+
_symmetry_space_group_name_H-M 'P m -3 m'
|
| 529 |
+
_cell_length_a 6.2894
|
| 530 |
+
_cell_length_b 6.2894
|
| 531 |
+
_cell_length_c 6.2894
|
| 532 |
+
_cell_angle_alpha 90.0
|
| 533 |
+
_cell_angle_beta 90.0
|
| 534 |
+
_cell_angle_gamma 90.0
|
| 535 |
+
_cell_volume 248.89
|
| 536 |
+
_chemical_formula_sum 'Cs1 Pb1 I3'
|
| 537 |
+
|
| 538 |
+
loop_
|
| 539 |
+
_atom_site_label
|
| 540 |
+
_atom_site_type_symbol
|
| 541 |
+
_atom_site_fract_x
|
| 542 |
+
_atom_site_fract_y
|
| 543 |
+
_atom_site_fract_z
|
| 544 |
+
Cs1 Cs 0.0 0.0 0.0
|
| 545 |
+
Pb1 Pb 0.5 0.5 0.5
|
| 546 |
+
I1 I 0.5 0.5 0.0
|
| 547 |
+
I2 I 0.5 0.0 0.5
|
| 548 |
+
I3 I 0.0 0.5 0.5
|
| 549 |
+
"""
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
# ============ Main Entry Point ============
|
| 553 |
+
|
| 554 |
+
if __name__ == "__main__":
|
| 555 |
+
import sys
|
| 556 |
+
|
| 557 |
+
# Support command line arguments for transport selection
|
| 558 |
+
transport = os.environ.get("MCP_TRANSPORT", "sse")
|
| 559 |
+
|
| 560 |
+
if transport == "stdio":
|
| 561 |
+
mcp.run(transport="stdio")
|
| 562 |
+
else:
|
| 563 |
+
# SSE mode (default, for HuggingFace)
|
| 564 |
+
host = os.environ.get("HOST", "0.0.0.0")
|
| 565 |
+
port = int(os.environ.get("PORT", 7860))
|
| 566 |
+
mcp.run(transport="sse", host=host, port=port)
|