SEUyishu commited on
Commit
bec0b04
·
verified ·
1 Parent(s): 778fec6

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +40 -0
  2. app.py +91 -0
  3. config.yml +248 -0
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Create non-root user for HuggingFace Space
4
+ RUN useradd -m -u 1000 user && \
5
+ apt-get update && \
6
+ apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/* && \
10
+ python -m pip install --upgrade pip
11
+
12
+ USER user
13
+ ENV PATH="/home/user/.local/bin:$PATH"
14
+ ENV HOME="/home/user"
15
+
16
+ WORKDIR /app
17
+
18
+ # Copy requirements first for better caching
19
+ COPY --chown=user ./mcp_output/requirements.txt /app/requirements.txt
20
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
21
+
22
+ # Copy the entire MatDeepLearn project
23
+ COPY --chown=user . /app
24
+
25
+ # Set Python path to include MatDeepLearn
26
+ ENV PYTHONPATH=/app:$PYTHONPATH
27
+
28
+ # Expose HuggingFace default port
29
+ EXPOSE 7860
30
+
31
+ # Set environment variables for MCP service
32
+ ENV MCP_TRANSPORT=http
33
+ ENV MCP_PORT=7860
34
+
35
+ # Health check
36
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
37
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/health')" || exit 1
38
+
39
+ # Start MCP service
40
+ CMD ["python", "mcp_output/start_mcp.py"]
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MatDeepLearn MCP Service - HuggingFace Space Entry Point
3
+
4
+ This file provides a FastAPI application for health checks and service info.
5
+ The actual MCP service is started via start_mcp.py.
6
+ """
7
+ from fastapi import FastAPI
8
+ from fastapi.responses import JSONResponse
9
+ import os
10
+ import sys
11
+
12
+ # Add project to path
13
+ project_root = os.path.dirname(os.path.abspath(__file__))
14
+ if project_root not in sys.path:
15
+ sys.path.insert(0, project_root)
16
+
17
+ app = FastAPI(
18
+ title="MatDeepLearn MCP Service",
19
+ description="Graph Neural Networks for Materials Property Prediction",
20
+ version="1.0.0"
21
+ )
22
+
23
+
24
+ @app.get("/")
25
+ async def root():
26
+ """Root endpoint with service information."""
27
+ return {
28
+ "status": "ok",
29
+ "service": "MatDeepLearn MCP Service",
30
+ "description": "Graph Neural Networks for Materials Property Prediction",
31
+ "transport": os.environ.get("MCP_TRANSPORT", "stdio"),
32
+ "available_models": [
33
+ "CGCNN_demo", "MPNN_demo", "SchNet_demo",
34
+ "MEGNet_demo", "GCN_demo", "SOAP_demo", "SM_demo"
35
+ ]
36
+ }
37
+
38
+
39
+ @app.get("/health")
40
+ async def health():
41
+ """Health check endpoint."""
42
+ try:
43
+ import torch
44
+ gpu_available = torch.cuda.is_available()
45
+ except:
46
+ gpu_available = False
47
+
48
+ return {
49
+ "status": "healthy",
50
+ "gpu_available": gpu_available
51
+ }
52
+
53
+
54
+ @app.get("/info")
55
+ async def info():
56
+ """Detailed service information."""
57
+ try:
58
+ import torch
59
+ torch_version = torch.__version__
60
+ gpu_available = torch.cuda.is_available()
61
+ gpu_count = torch.cuda.device_count() if gpu_available else 0
62
+ except:
63
+ torch_version = "N/A"
64
+ gpu_available = False
65
+ gpu_count = 0
66
+
67
+ return {
68
+ "service": "MatDeepLearn MCP Service",
69
+ "version": "1.0.0",
70
+ "torch_version": torch_version,
71
+ "gpu_available": gpu_available,
72
+ "gpu_count": gpu_count,
73
+ "mcp_tools": [
74
+ "check_environment",
75
+ "list_available_models",
76
+ "get_model_config",
77
+ "process_structure_data",
78
+ "train_model",
79
+ "predict_properties",
80
+ "cross_validation",
81
+ "analyze_structure",
82
+ "compare_models",
83
+ "get_dataset_info"
84
+ ]
85
+ }
86
+
87
+
88
+ if __name__ == "__main__":
89
+ import uvicorn
90
+ port = int(os.environ.get("PORT", "7860"))
91
+ uvicorn.run(app, host="0.0.0.0", port=port)
config.yml ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Job:
2
+ run_mode: "Training"
3
+ #{Training, Predict, Repeat, CV, Hyperparameter, Ensemble, Analysis}
4
+ Training:
5
+ job_name: "my_train_job"
6
+ reprocess: "False"
7
+ model: CGCNN_demo
8
+ load_model: "False"
9
+ save_model: "True"
10
+ model_path: "my_model.pth"
11
+ write_output: "True"
12
+ parallel: "True"
13
+ #seed=0 means random initalization
14
+ seed: 0
15
+ Predict:
16
+ job_name: "my_predict_job"
17
+ reprocess: "False"
18
+ model_path: "my_model.pth"
19
+ write_output: "True"
20
+ seed: 0
21
+ Repeat:
22
+ job_name: "my_repeat_job"
23
+ reprocess: "False"
24
+ model: CGCNN_demo
25
+ model_path: "my_model.pth"
26
+ write_output: "False"
27
+ parallel: "True"
28
+ seed: 0
29
+ ###specific options
30
+ #number of repeat trials
31
+ repeat_trials: 5
32
+ CV:
33
+ job_name: "my_CV_job"
34
+ reprocess: "False"
35
+ model: CGCNN_demo
36
+ write_output: "True"
37
+ parallel: "True"
38
+ seed: 0
39
+ ###specific options
40
+ #number of folds for n-fold CV
41
+ cv_folds: 5
42
+ Hyperparameter:
43
+ job_name: "my_hyperparameter_job"
44
+ reprocess: "False"
45
+ model: CGCNN_demo
46
+ seed: 0
47
+ ###specific options
48
+ hyper_trials: 10
49
+ #number of concurrent trials (can be greater than number of GPUs)
50
+ hyper_concurrency: 8
51
+ #frequency of checkpointing and update (default: 1)
52
+ hyper_iter: 1
53
+ #resume a previous hyperparameter optimization run
54
+ hyper_resume: "True"
55
+ #Verbosity of ray tune output; available: (1, 2, 3)
56
+ hyper_verbosity: 1
57
+ #Delete processed datasets
58
+ hyper_delete_processed: "True"
59
+ Ensemble:
60
+ job_name: "my_ensemble_job"
61
+ reprocess: "False"
62
+ save_model: "False"
63
+ model_path: "my_model.pth"
64
+ write_output: "Partial"
65
+ parallel: "True"
66
+ seed: 0
67
+ ###specific options
68
+ #List of models to use: (Example: "CGCNN_demo,MPNN_demo,SchNet_demo,MEGNet_demo" or "CGCNN_demo,CGCNN_demo,CGCNN_demo,CGCNN_demo")
69
+ ensemble_list: "CGCNN_demo,CGCNN_demo,CGCNN_demo,CGCNN_demo,CGCNN_demo"
70
+ Analysis:
71
+ job_name: "my_job"
72
+ reprocess: "False"
73
+ model: CGCNN_demo
74
+ model_path: "my_model.pth"
75
+ write_output: "True"
76
+ seed: 0
77
+
78
+ Processing:
79
+ #Whether to use "inmemory" or "large" format for pytorch-geometric dataset. Reccomend inmemory unless the dataset is too large
80
+ dataset_type: "inmemory"
81
+ #Path to data files
82
+ data_path: "/data"
83
+ #Path to target file within data_path
84
+ target_path: "targets.csv"
85
+ #Method of obtaining atom idctionary: available:(provided, default, blank, generated)
86
+ dictionary_source: "default"
87
+ #Path to atom dictionary file within data_path
88
+ dictionary_path: "atom_dict.json"
89
+ #Format of data files (limit to those supported by ASE)
90
+ data_format: "json"
91
+ #Print out processing info
92
+ verbose: "True"
93
+ #graph specific settings
94
+ graph_max_radius : 8.0
95
+ graph_max_neighbors : 12
96
+ voronoi: "False"
97
+ edge_features: "True"
98
+ graph_edge_length : 50
99
+ #SM specific settings
100
+ SM_descriptor: "False"
101
+ #SOAP specific settings
102
+ SOAP_descriptor: "False"
103
+ SOAP_rcut : 8.0
104
+ SOAP_nmax : 6
105
+ SOAP_lmax : 4
106
+ SOAP_sigma : 0.3
107
+
108
+ Training:
109
+ #Index of target column in targets.csv
110
+ target_index: 0
111
+ #Loss functions (from pytorch) examples: l1_loss, mse_loss, binary_cross_entropy
112
+ loss: "l1_loss"
113
+ #Ratios for train/val/test split out of a total of 1
114
+ train_ratio: 0.8
115
+ val_ratio: 0.05
116
+ test_ratio: 0.15
117
+ #Training print out frequency (print per n number of epochs)
118
+ verbosity: 5
119
+
120
+ Models:
121
+ CGCNN_demo:
122
+ model: CGCNN
123
+ dim1: 100
124
+ dim2: 150
125
+ pre_fc_count: 1
126
+ gc_count: 4
127
+ post_fc_count: 3
128
+ pool: "global_mean_pool"
129
+ pool_order: "early"
130
+ batch_norm: "True"
131
+ batch_track_stats: "True"
132
+ act: "relu"
133
+ dropout_rate: 0.0
134
+ epochs: 250
135
+ lr: 0.002
136
+ batch_size: 100
137
+ optimizer: "AdamW"
138
+ optimizer_args: {}
139
+ scheduler: "ReduceLROnPlateau"
140
+ scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
141
+ MPNN_demo:
142
+ model: MPNN
143
+ dim1: 100
144
+ dim2: 100
145
+ dim3: 100
146
+ pre_fc_count: 1
147
+ gc_count: 4
148
+ post_fc_count: 3
149
+ pool: "global_mean_pool"
150
+ pool_order: "early"
151
+ batch_norm: "True"
152
+ batch_track_stats: "True"
153
+ act: "relu"
154
+ dropout_rate: 0.0
155
+ epochs: 250
156
+ lr: 0.001
157
+ batch_size: 100
158
+ optimizer: "AdamW"
159
+ optimizer_args: {}
160
+ scheduler: "ReduceLROnPlateau"
161
+ scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
162
+ SchNet_demo:
163
+ model: SchNet
164
+ dim1: 100
165
+ dim2: 100
166
+ dim3: 150
167
+ cutoff: 8
168
+ pre_fc_count: 1
169
+ gc_count: 4
170
+ post_fc_count: 3
171
+ pool: "global_mean_pool"
172
+ pool_order: "early"
173
+ batch_norm: "True"
174
+ batch_track_stats: "True"
175
+ act: "relu"
176
+ dropout_rate: 0.0
177
+ epochs: 250
178
+ lr: 0.0005
179
+ batch_size: 100
180
+ optimizer: "AdamW"
181
+ optimizer_args: {}
182
+ scheduler: "ReduceLROnPlateau"
183
+ scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
184
+ MEGNet_demo:
185
+ model: MEGNet
186
+ dim1: 100
187
+ dim2: 100
188
+ dim3: 100
189
+ pre_fc_count: 1
190
+ gc_count: 4
191
+ gc_fc_count: 1
192
+ post_fc_count: 3
193
+ pool: "global_mean_pool"
194
+ pool_order: "early"
195
+ batch_norm: "True"
196
+ batch_track_stats: "True"
197
+ act: "relu"
198
+ dropout_rate: 0.0
199
+ epochs: 250
200
+ lr: 0.0005
201
+ batch_size: 100
202
+ optimizer: "AdamW"
203
+ optimizer_args: {}
204
+ scheduler: "ReduceLROnPlateau"
205
+ scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
206
+ GCN_demo:
207
+ model: GCN
208
+ dim1: 100
209
+ dim2: 150
210
+ pre_fc_count: 1
211
+ gc_count: 4
212
+ post_fc_count: 3
213
+ pool: "global_mean_pool"
214
+ pool_order: "early"
215
+ batch_norm: "True"
216
+ batch_track_stats: "True"
217
+ act: "relu"
218
+ dropout_rate: 0.0
219
+ epochs: 250
220
+ lr: 0.002
221
+ batch_size: 100
222
+ optimizer: "AdamW"
223
+ optimizer_args: {}
224
+ scheduler: "ReduceLROnPlateau"
225
+ scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
226
+ SM_demo:
227
+ model: SM
228
+ dim1: 100
229
+ fc_count: 2
230
+ epochs: 200
231
+ lr: 0.002
232
+ batch_size: 100
233
+ optimizer: "AdamW"
234
+ optimizer_args: {}
235
+ scheduler: "ReduceLROnPlateau"
236
+ scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
237
+ SOAP_demo:
238
+ model: SOAP
239
+ dim1: 100
240
+ fc_count: 2
241
+ epochs: 200
242
+ lr: 0.002
243
+ batch_size: 100
244
+ optimizer: "AdamW"
245
+ optimizer_args: {}
246
+ scheduler: "ReduceLROnPlateau"
247
+ scheduler_args: {"mode":"min", "factor":0.8, "patience":10, "min_lr":0.00001, "threshold":0.0002}
248
+