SEUyishu commited on
Commit
c2d9a42
·
verified ·
1 Parent(s): b62550b

Upload 6 files

Browse files
Files changed (3) hide show
  1. Dockerfile +6 -2
  2. mcp_service.py +12 -2
  3. start_mcp.py +368 -188
Dockerfile CHANGED
@@ -16,6 +16,10 @@ ENV PYTHONUNBUFFERED=1
16
  ENV MCP_HOST=0.0.0.0
17
  ENV MCP_PORT=7860
18
 
 
 
 
 
19
  # Install system dependencies
20
  RUN apt-get update && apt-get install -y --no-install-recommends \
21
  build-essential \
@@ -45,9 +49,9 @@ RUN chmod -R 777 /app/sessions /app/temp
45
  # Expose MCP SSE port (HuggingFace Spaces uses 7860)
46
  EXPOSE 7860
47
 
48
- # Health check for MCP SSE endpoint
49
  HEALTHCHECK --interval=30s --timeout=30s --start-period=10s --retries=3 \
50
- CMD curl -f http://localhost:7860/sse || exit 1
51
 
52
  # Run MCP service in SSE mode
53
  CMD ["python", "start_mcp.py", "--mode", "sse", "--host", "0.0.0.0", "--port", "7860"]
 
16
  ENV MCP_HOST=0.0.0.0
17
  ENV MCP_PORT=7860
18
 
19
+ # Disable MCP transport security for reverse proxy (HuggingFace Space)
20
+ ENV MCP_TRANSPORT_SECURITY_ENABLED=false
21
+ ENV FASTMCP_ALLOWED_HOSTS=*
22
+
23
  # Install system dependencies
24
  RUN apt-get update && apt-get install -y --no-install-recommends \
25
  build-essential \
 
49
  # Expose MCP SSE port (HuggingFace Spaces uses 7860)
50
  EXPOSE 7860
51
 
52
+ # Health check endpoint
53
  HEALTHCHECK --interval=30s --timeout=30s --start-period=10s --retries=3 \
54
+ CMD curl -f http://localhost:7860/health || exit 1
55
 
56
  # Run MCP service in SSE mode
57
  CMD ["python", "start_mcp.py", "--mode", "sse", "--host", "0.0.0.0", "--port", "7860"]
mcp_service.py CHANGED
@@ -858,8 +858,18 @@ def get_extractor() -> GPTExtractor:
858
  gpt_extractor = GPTExtractor()
859
  return gpt_extractor
860
 
861
- # Create MCP server
862
- mcp = FastMCP("MaTableGPT-MCP")
 
 
 
 
 
 
 
 
 
 
863
 
864
  # =============================================================================
865
  # MCP Tools
 
858
  gpt_extractor = GPTExtractor()
859
  return gpt_extractor
860
 
861
+ # Create MCP server with permissive settings for HuggingFace Space
862
+ # Disable host validation for reverse proxy environments
863
+ mcp = FastMCP(
864
+ "MaTableGPT-MCP",
865
+ host="0.0.0.0",
866
+ port=7860
867
+ )
868
+
869
+ # Access the underlying server to configure it
870
+ # This allows connections from any host (needed for HuggingFace Space)
871
+ if hasattr(mcp, '_mcp_server'):
872
+ mcp._mcp_server.request_context = None # Disable request validation
873
 
874
  # =============================================================================
875
  # MCP Tools
start_mcp.py CHANGED
@@ -1,188 +1,368 @@
1
- #!/usr/bin/env python3
2
- """
3
- MaTableGPT MCP Server Launcher
4
- ==============================
5
-
6
- This script starts the MaTableGPT MCP service for extracting
7
- table data from materials science literature.
8
-
9
- Usage:
10
- python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
11
-
12
- Arguments:
13
- --host Host address (default: 0.0.0.0)
14
- --port Port number (default: 7860)
15
- --mode Run mode: 'stdio' or 'sse' (default: sse for HuggingFace Space)
16
-
17
- Environment Variables:
18
- LLM_API_KEY / OPENAI_API_KEY - API key for LLM service
19
- LLM_API_BASE / OPENAI_API_BASE - Custom API base URL (for third-party services)
20
- LLM_MODEL / OPENAI_MODEL - Model name (default: gpt-4-turbo-preview)
21
- MCP_HOST - Server host (default: 0.0.0.0)
22
- MCP_PORT - Server port (default: 7860)
23
- """
24
-
25
- import os
26
- import sys
27
- import argparse
28
- import logging
29
-
30
- # Add current directory to path for imports
31
- sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
32
-
33
- # Configure logging
34
- logging.basicConfig(
35
- level=logging.INFO,
36
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
37
- )
38
- logger = logging.getLogger("matablgpt-mcp-launcher")
39
-
40
-
41
- def check_environment():
42
- """Check if required environment variables are set."""
43
- warnings = []
44
-
45
- # Check for API key (support both naming conventions)
46
- api_key = os.environ.get('LLM_API_KEY') or os.environ.get('OPENAI_API_KEY')
47
- if not api_key:
48
- warnings.append(
49
- "LLM_API_KEY/OPENAI_API_KEY not set. GPT extraction features will not work. "
50
- "Set it in HuggingFace Space secrets or environment variables."
51
- )
52
-
53
- # Check for API base (for third-party services)
54
- api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
55
- if api_base:
56
- logger.info(f"Using custom API base: {api_base}")
57
-
58
- return warnings
59
-
60
-
61
- def check_dependencies():
62
- """Check if required packages are installed."""
63
- missing = []
64
-
65
- required = [
66
- ('mcp', 'mcp[cli]'),
67
- ('openai', 'openai'),
68
- ('bs4', 'beautifulsoup4'),
69
- ('pandas', 'pandas'),
70
- ('lxml', 'lxml')
71
- ]
72
-
73
- for module, package in required:
74
- try:
75
- __import__(module)
76
- except ImportError:
77
- missing.append(package)
78
-
79
- return missing
80
-
81
-
82
- def run_sse_server(host: str, port: int):
83
- """Run MCP server in SSE mode using uvicorn."""
84
- import uvicorn
85
- from mcp_service import mcp
86
-
87
- # Get the SSE app from FastMCP
88
- sse_app = mcp.sse_app()
89
-
90
- logger.info(f"Starting MCP SSE server on {host}:{port}")
91
- logger.info(f"SSE Endpoint: http://{host}:{port}/sse")
92
-
93
- # Run with uvicorn
94
- uvicorn.run(
95
- sse_app,
96
- host=host,
97
- port=port,
98
- log_level="info"
99
- )
100
-
101
-
102
- def run_stdio_server():
103
- """Run MCP server in stdio mode."""
104
- from mcp_service import mcp
105
- logger.info("Starting MCP server in stdio mode...")
106
- mcp.run()
107
-
108
-
109
- def main():
110
- """Main entry point."""
111
- # Get default values from environment variables
112
- default_host = os.environ.get('MCP_HOST', '0.0.0.0')
113
- default_port = int(os.environ.get('MCP_PORT', '7860'))
114
-
115
- parser = argparse.ArgumentParser(
116
- description="MaTableGPT MCP Server - Table Data Extraction from Materials Science Literature"
117
- )
118
- parser.add_argument(
119
- '--host',
120
- default=default_host,
121
- help=f'Host address (default: {default_host})'
122
- )
123
- parser.add_argument(
124
- '--port',
125
- type=int,
126
- default=default_port,
127
- help=f'Port number (default: {default_port})'
128
- )
129
- parser.add_argument(
130
- '--mode',
131
- choices=['stdio', 'sse'],
132
- default='sse',
133
- help='Run mode: stdio for standard I/O, sse for Server-Sent Events (default: sse)'
134
- )
135
- parser.add_argument(
136
- '--debug',
137
- action='store_true',
138
- help='Enable debug logging'
139
- )
140
-
141
- args = parser.parse_args()
142
-
143
- if args.debug:
144
- logging.getLogger().setLevel(logging.DEBUG)
145
-
146
- # Check dependencies
147
- missing = check_dependencies()
148
- if missing:
149
- logger.error(f"Missing required packages: {', '.join(missing)}")
150
- logger.error(f"Install with: pip install {' '.join(missing)}")
151
- sys.exit(1)
152
-
153
- # Check environment
154
- warnings = check_environment()
155
- for warning in warnings:
156
- logger.warning(warning)
157
-
158
- # Display startup info
159
- logger.info("=" * 60)
160
- logger.info("MaTableGPT MCP Server")
161
- logger.info("=" * 60)
162
- logger.info(f"Mode: {args.mode}")
163
- if args.mode == 'sse':
164
- logger.info(f"Host: {args.host}")
165
- logger.info(f"Port: {args.port}")
166
- logger.info(f"SSE Endpoint: http://{args.host}:{args.port}/sse")
167
- logger.info("=" * 60)
168
-
169
- # Run the server
170
- try:
171
- if args.mode == 'stdio':
172
- run_stdio_server()
173
- else:
174
- run_sse_server(args.host, args.port)
175
-
176
- except ImportError as e:
177
- logger.error(f"Failed to import required module: {e}")
178
- logger.error("Make sure all dependencies are installed: pip install -r requirements.txt")
179
- sys.exit(1)
180
- except Exception as e:
181
- logger.error(f"Error starting MCP server: {e}")
182
- import traceback
183
- traceback.print_exc()
184
- sys.exit(1)
185
-
186
-
187
- if __name__ == "__main__":
188
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ MaTableGPT MCP Server Launcher
4
+ ==============================
5
+
6
+ This script starts the MaTableGPT MCP service for extracting
7
+ table data from materials science literature.
8
+
9
+ Usage:
10
+ python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
11
+
12
+ Arguments:
13
+ --host Host address (default: 0.0.0.0)
14
+ --port Port number (default: 7860)
15
+ --mode Run mode: 'stdio' or 'sse' (default: sse for HuggingFace Space)
16
+
17
+ Environment Variables:
18
+ LLM_API_KEY / OPENAI_API_KEY - API key for LLM service
19
+ LLM_API_BASE / OPENAI_API_BASE - Custom API base URL (for third-party services)
20
+ LLM_MODEL / OPENAI_MODEL - Model name (default: gpt-4-turbo-preview)
21
+ MCP_HOST - Server host (default: 0.0.0.0)
22
+ MCP_PORT - Server port (default: 7860)
23
+ """
24
+
25
+ import os
26
+ import sys
27
+ import argparse
28
+ import logging
29
+
30
+ # Add current directory to path for imports
31
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
32
+
33
+ # Configure logging
34
+ logging.basicConfig(
35
+ level=logging.INFO,
36
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
37
+ )
38
+ logger = logging.getLogger("matablgpt-mcp-launcher")
39
+
40
+
41
+ def check_environment():
42
+ """Check if required environment variables are set."""
43
+ warnings = []
44
+
45
+ # Check for API key (support both naming conventions)
46
+ api_key = os.environ.get('LLM_API_KEY') or os.environ.get('OPENAI_API_KEY')
47
+ if not api_key:
48
+ warnings.append(
49
+ "LLM_API_KEY/OPENAI_API_KEY not set. GPT extraction features will not work. "
50
+ "Set it in HuggingFace Space secrets or environment variables."
51
+ )
52
+
53
+ # Check for API base (for third-party services)
54
+ api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
55
+ if api_base:
56
+ logger.info(f"Using custom API base: {api_base}")
57
+
58
+ return warnings
59
+
60
+
61
+ def check_dependencies():
62
+ """Check if required packages are installed."""
63
+ missing = []
64
+
65
+ required = [
66
+ ('mcp', 'mcp[cli]'),
67
+ ('openai', 'openai'),
68
+ ('bs4', 'beautifulsoup4'),
69
+ ('pandas', 'pandas'),
70
+ ('lxml', 'lxml')
71
+ ]
72
+
73
+ for module, package in required:
74
+ try:
75
+ __import__(module)
76
+ except ImportError:
77
+ missing.append(package)
78
+
79
+ return missing
80
+
81
+
82
+ def run_sse_server(host: str, port: int):
83
+ """Run MCP server in SSE mode using uvicorn with custom app."""
84
+ import uvicorn
85
+ from starlette.applications import Starlette
86
+ from starlette.routing import Route
87
+ from starlette.responses import JSONResponse, HTMLResponse, Response
88
+ from starlette.requests import Request
89
+ import asyncio
90
+ import json
91
+
92
+ # Import MCP components
93
+ from mcp_service import mcp
94
+
95
+ # Store for SSE connections and their message queues
96
+ sse_connections = {}
97
+
98
+ async def handle_sse(request: Request):
99
+ """Handle SSE connections - returns event stream."""
100
+ import uuid
101
+ from sse_starlette.sse import EventSourceResponse
102
+
103
+ connection_id = str(uuid.uuid4())
104
+ message_queue = asyncio.Queue()
105
+ sse_connections[connection_id] = message_queue
106
+
107
+ logger.info(f"New SSE connection: {connection_id}")
108
+
109
+ async def event_generator():
110
+ try:
111
+ # Send initial connection event with endpoint info
112
+ yield {
113
+ "event": "endpoint",
114
+ "data": json.dumps({"url": f"/messages/?session_id={connection_id}"})
115
+ }
116
+
117
+ # Keep connection alive and send messages
118
+ while True:
119
+ try:
120
+ # Wait for messages with timeout for keepalive
121
+ message = await asyncio.wait_for(message_queue.get(), timeout=30)
122
+ yield {"event": "message", "data": json.dumps(message)}
123
+ except asyncio.TimeoutError:
124
+ # Send keepalive ping
125
+ yield {"event": "ping", "data": ""}
126
+
127
+ except asyncio.CancelledError:
128
+ logger.info(f"SSE connection closed: {connection_id}")
129
+ finally:
130
+ sse_connections.pop(connection_id, None)
131
+
132
+ return EventSourceResponse(event_generator())
133
+
134
+ async def handle_messages(request: Request):
135
+ """Handle POST messages from MCP client."""
136
+ try:
137
+ session_id = request.query_params.get("session_id")
138
+ if not session_id or session_id not in sse_connections:
139
+ return JSONResponse(
140
+ {"error": "Invalid or missing session_id"},
141
+ status_code=400
142
+ )
143
+
144
+ body = await request.json()
145
+ logger.debug(f"Received message: {body}")
146
+
147
+ # Process the MCP request
148
+ # Get the method and params
149
+ method = body.get("method", "")
150
+ params = body.get("params", {})
151
+ msg_id = body.get("id")
152
+
153
+ # Handle different MCP methods
154
+ if method == "initialize":
155
+ response = {
156
+ "jsonrpc": "2.0",
157
+ "id": msg_id,
158
+ "result": {
159
+ "protocolVersion": "2024-11-05",
160
+ "serverInfo": {
161
+ "name": "MaTableGPT-MCP",
162
+ "version": "1.0.0"
163
+ },
164
+ "capabilities": {
165
+ "tools": {"listChanged": True}
166
+ }
167
+ }
168
+ }
169
+ elif method == "tools/list":
170
+ # Get tools from MCP server
171
+ tools = []
172
+ for tool_name, tool_func in mcp._tool_manager._tools.items():
173
+ tool_info = {
174
+ "name": tool_name,
175
+ "description": tool_func.__doc__ or "",
176
+ "inputSchema": {"type": "object", "properties": {}}
177
+ }
178
+ tools.append(tool_info)
179
+
180
+ response = {
181
+ "jsonrpc": "2.0",
182
+ "id": msg_id,
183
+ "result": {"tools": tools}
184
+ }
185
+ elif method == "tools/call":
186
+ tool_name = params.get("name")
187
+ tool_args = params.get("arguments", {})
188
+
189
+ try:
190
+ # Call the tool
191
+ if tool_name in mcp._tool_manager._tools:
192
+ result = mcp._tool_manager._tools[tool_name](**tool_args)
193
+ response = {
194
+ "jsonrpc": "2.0",
195
+ "id": msg_id,
196
+ "result": {
197
+ "content": [{"type": "text", "text": json.dumps(result)}]
198
+ }
199
+ }
200
+ else:
201
+ response = {
202
+ "jsonrpc": "2.0",
203
+ "id": msg_id,
204
+ "error": {"code": -32601, "message": f"Tool not found: {tool_name}"}
205
+ }
206
+ except Exception as e:
207
+ response = {
208
+ "jsonrpc": "2.0",
209
+ "id": msg_id,
210
+ "error": {"code": -32000, "message": str(e)}
211
+ }
212
+ else:
213
+ response = {
214
+ "jsonrpc": "2.0",
215
+ "id": msg_id,
216
+ "error": {"code": -32601, "message": f"Method not found: {method}"}
217
+ }
218
+
219
+ # Send response through SSE
220
+ await sse_connections[session_id].put(response)
221
+
222
+ return JSONResponse({"status": "accepted"}, status_code=202)
223
+
224
+ except Exception as e:
225
+ logger.error(f"Error handling message: {e}")
226
+ return JSONResponse({"error": str(e)}, status_code=500)
227
+
228
+ async def health_check(request: Request):
229
+ """Health check endpoint."""
230
+ return JSONResponse({
231
+ "status": "ok",
232
+ "service": "MaTableGPT MCP",
233
+ "connections": len(sse_connections)
234
+ })
235
+
236
+ async def homepage(request: Request):
237
+ """Homepage with service info."""
238
+ html = """
239
+ <!DOCTYPE html>
240
+ <html>
241
+ <head><title>MaTableGPT MCP Service</title></head>
242
+ <body>
243
+ <h1>🔬 MaTableGPT MCP Service</h1>
244
+ <p>GPT-based Table Data Extractor from Materials Science Literature</p>
245
+ <h2>Endpoints:</h2>
246
+ <ul>
247
+ <li><strong>SSE:</strong> <code>/sse</code></li>
248
+ <li><strong>Messages:</strong> <code>/messages/</code></li>
249
+ <li><strong>Health:</strong> <code>/health</code></li>
250
+ </ul>
251
+ <h2>Status: ✅ Running</h2>
252
+ <p>Connect with: <code>{"url": "https://your-space.hf.space/sse"}</code></p>
253
+ </body>
254
+ </html>
255
+ """
256
+ return HTMLResponse(html)
257
+
258
+ # Create Starlette app with routes
259
+ app = Starlette(
260
+ debug=True,
261
+ routes=[
262
+ Route("/", homepage),
263
+ Route("/health", health_check),
264
+ Route("/sse", handle_sse),
265
+ Route("/messages/", handle_messages, methods=["POST"]),
266
+ ]
267
+ )
268
+
269
+ logger.info(f"Starting MCP SSE server on {host}:{port}")
270
+ logger.info(f"SSE Endpoint: http://{host}:{port}/sse")
271
+ logger.info(f"Messages Endpoint: http://{host}:{port}/messages/")
272
+
273
+ # Run with uvicorn
274
+ uvicorn.run(
275
+ app,
276
+ host=host,
277
+ port=port,
278
+ log_level="info"
279
+ )
280
+
281
+
282
+ def run_stdio_server():
283
+ """Run MCP server in stdio mode."""
284
+ from mcp_service import mcp
285
+ logger.info("Starting MCP server in stdio mode...")
286
+ mcp.run()
287
+
288
+
289
+ def main():
290
+ """Main entry point."""
291
+ # Get default values from environment variables
292
+ default_host = os.environ.get('MCP_HOST', '0.0.0.0')
293
+ default_port = int(os.environ.get('MCP_PORT', '7860'))
294
+
295
+ parser = argparse.ArgumentParser(
296
+ description="MaTableGPT MCP Server - Table Data Extraction from Materials Science Literature"
297
+ )
298
+ parser.add_argument(
299
+ '--host',
300
+ default=default_host,
301
+ help=f'Host address (default: {default_host})'
302
+ )
303
+ parser.add_argument(
304
+ '--port',
305
+ type=int,
306
+ default=default_port,
307
+ help=f'Port number (default: {default_port})'
308
+ )
309
+ parser.add_argument(
310
+ '--mode',
311
+ choices=['stdio', 'sse'],
312
+ default='sse',
313
+ help='Run mode: stdio for standard I/O, sse for Server-Sent Events (default: sse)'
314
+ )
315
+ parser.add_argument(
316
+ '--debug',
317
+ action='store_true',
318
+ help='Enable debug logging'
319
+ )
320
+
321
+ args = parser.parse_args()
322
+
323
+ if args.debug:
324
+ logging.getLogger().setLevel(logging.DEBUG)
325
+
326
+ # Check dependencies
327
+ missing = check_dependencies()
328
+ if missing:
329
+ logger.error(f"Missing required packages: {', '.join(missing)}")
330
+ logger.error(f"Install with: pip install {' '.join(missing)}")
331
+ sys.exit(1)
332
+
333
+ # Check environment
334
+ warnings = check_environment()
335
+ for warning in warnings:
336
+ logger.warning(warning)
337
+
338
+ # Display startup info
339
+ logger.info("=" * 60)
340
+ logger.info("MaTableGPT MCP Server")
341
+ logger.info("=" * 60)
342
+ logger.info(f"Mode: {args.mode}")
343
+ if args.mode == 'sse':
344
+ logger.info(f"Host: {args.host}")
345
+ logger.info(f"Port: {args.port}")
346
+ logger.info(f"SSE Endpoint: http://{args.host}:{args.port}/sse")
347
+ logger.info("=" * 60)
348
+
349
+ # Run the server
350
+ try:
351
+ if args.mode == 'stdio':
352
+ run_stdio_server()
353
+ else:
354
+ run_sse_server(args.host, args.port)
355
+
356
+ except ImportError as e:
357
+ logger.error(f"Failed to import required module: {e}")
358
+ logger.error("Make sure all dependencies are installed: pip install -r requirements.txt")
359
+ sys.exit(1)
360
+ except Exception as e:
361
+ logger.error(f"Error starting MCP server: {e}")
362
+ import traceback
363
+ traceback.print_exc()
364
+ sys.exit(1)
365
+
366
+
367
+ if __name__ == "__main__":
368
+ main()