SEUyishu commited on
Commit
268cef6
·
verified ·
1 Parent(s): b410b8e

Update start_mcp.py

Browse files
Files changed (1) hide show
  1. start_mcp.py +109 -297
start_mcp.py CHANGED
@@ -1,31 +1,21 @@
1
  #!/usr/bin/env python3
2
  """
3
- MaTableGPT MCP Server Launcher
4
- ==============================
5
 
6
- This script starts the MaTableGPT MCP service for extracting
7
- table data from materials science literature.
8
 
9
  Usage:
10
  python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
11
-
12
- Arguments:
13
- --host Host address (default: 0.0.0.0)
14
- --port Port number (default: 7860)
15
- --mode Run mode: 'stdio' or 'sse' (default: sse for HuggingFace Space)
16
-
17
- Environment Variables:
18
- LLM_API_KEY / OPENAI_API_KEY - API key for LLM service
19
- LLM_API_BASE / OPENAI_API_BASE - Custom API base URL (for third-party services)
20
- LLM_MODEL / OPENAI_MODEL - Model name (default: gpt-4-turbo-preview)
21
- MCP_HOST - Server host (default: 0.0.0.0)
22
- MCP_PORT - Server port (default: 7860)
23
  """
24
 
25
  import os
26
  import sys
27
  import argparse
28
  import logging
 
 
 
29
 
30
  # Add current directory to path for imports
31
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -35,345 +25,167 @@ logging.basicConfig(
35
  level=logging.INFO,
36
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
37
  )
38
- logger = logging.getLogger("matablgpt-mcp-launcher")
39
-
40
-
41
- def check_environment():
42
- """Check if required environment variables are set."""
43
- warnings = []
44
-
45
- # Check for API key (support both naming conventions)
46
- api_key = os.environ.get('LLM_API_KEY') or os.environ.get('OPENAI_API_KEY')
47
- if not api_key:
48
- warnings.append(
49
- "LLM_API_KEY/OPENAI_API_KEY not set. GPT extraction features will not work. "
50
- "Set it in HuggingFace Space secrets or environment variables."
51
- )
52
-
53
- # Check for API base (for third-party services)
54
- api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
55
- if api_base:
56
- logger.info(f"Using custom API base: {api_base}")
57
-
58
- return warnings
59
-
60
-
61
- def check_dependencies():
62
- """Check if required packages are installed."""
63
- missing = []
64
-
65
- required = [
66
- ('mcp', 'mcp[cli]'),
67
- ('openai', 'openai'),
68
- ('bs4', 'beautifulsoup4'),
69
- ('pandas', 'pandas'),
70
- ('lxml', 'lxml')
71
- ]
72
-
73
- for module, package in required:
74
- try:
75
- __import__(module)
76
- except ImportError:
77
- missing.append(package)
78
-
79
- return missing
80
 
81
 
82
  def run_sse_server(host: str, port: int):
83
- """Run MCP server in SSE mode using uvicorn with custom app."""
84
  import uvicorn
85
  from starlette.applications import Starlette
86
  from starlette.routing import Route
87
- from starlette.responses import JSONResponse, HTMLResponse, Response
88
  from starlette.requests import Request
89
- import asyncio
90
- import json
91
 
92
- # Import MCP components
93
  from mcp_service import mcp
94
 
95
- # Store for SSE connections and their message queues
96
- sse_connections = {}
97
 
98
- async def handle_sse(request: Request):
99
- """Handle SSE connections - returns event stream."""
100
- import uuid
 
 
101
 
102
- connection_id = str(uuid.uuid4())
103
- message_queue = asyncio.Queue()
104
- sse_connections[connection_id] = message_queue
105
 
106
- logger.info(f"New SSE connection: {connection_id}")
107
-
108
- async def event_generator():
109
- """Generate SSE events."""
110
- # Send initial connection event with endpoint info
111
- endpoint_data = json.dumps({"url": f"/messages/?session_id={connection_id}"})
112
- yield f"event: endpoint\ndata: {endpoint_data}\n\n"
113
-
114
- # Keep connection alive and send messages
115
- while True:
116
- try:
117
- # Wait for messages with timeout for keepalive
118
- message = await asyncio.wait_for(message_queue.get(), timeout=30)
119
- message_data = json.dumps(message)
120
- yield f"event: message\ndata: {message_data}\n\n"
121
- except asyncio.TimeoutError:
122
- # Send keepalive ping
123
- yield f": ping\n\n"
124
- except asyncio.CancelledError:
125
- logger.info(f"SSE connection closed: {connection_id}")
126
- break
127
- except Exception as e:
128
- logger.error(f"Error in SSE generator: {e}")
129
- break
130
-
131
- # Cleanup
132
- sse_connections.pop(connection_id, None)
133
-
134
- from starlette.responses import StreamingResponse
135
 
136
  return StreamingResponse(
137
- event_generator(),
138
  media_type="text/event-stream",
139
  headers={
140
  "Cache-Control": "no-cache",
141
  "Connection": "keep-alive",
142
- "X-Accel-Buffering": "no", # Disable nginx buffering
143
  }
144
  )
145
 
146
- async def handle_messages(request: Request):
147
- """Handle POST messages from MCP client."""
 
 
 
 
 
148
  try:
149
- session_id = request.query_params.get("session_id")
150
- if not session_id or session_id not in sse_connections:
151
- return JSONResponse(
152
- {"error": "Invalid or missing session_id"},
153
- status_code=400
154
- )
155
-
156
  body = await request.json()
157
- logger.debug(f"Received message: {body}")
158
 
159
- # Process the MCP request
160
- # Get the method and params
161
  method = body.get("method", "")
162
  params = body.get("params", {})
163
  msg_id = body.get("id")
164
 
165
- # Handle different MCP methods
166
  if method == "initialize":
167
- response = {
168
- "jsonrpc": "2.0",
169
- "id": msg_id,
170
- "result": {
171
- "protocolVersion": "2024-11-05",
172
- "serverInfo": {
173
- "name": "MaTableGPT-MCP",
174
- "version": "1.0.0"
175
- },
176
- "capabilities": {
177
- "tools": {"listChanged": True}
178
- }
179
- }
180
  }
 
 
 
181
  elif method == "tools/list":
182
- # Get tools from MCP server
183
  tools = []
184
- for tool_name, tool_func in mcp._tool_manager._tools.items():
185
- tool_info = {
186
- "name": tool_name,
187
- "description": tool_func.__doc__ or "",
188
  "inputSchema": {"type": "object", "properties": {}}
189
- }
190
- tools.append(tool_info)
191
-
192
- response = {
193
- "jsonrpc": "2.0",
194
- "id": msg_id,
195
- "result": {"tools": tools}
196
- }
197
  elif method == "tools/call":
198
  tool_name = params.get("name")
199
  tool_args = params.get("arguments", {})
200
 
201
- try:
202
- # Call the tool
203
- if tool_name in mcp._tool_manager._tools:
204
- result = mcp._tool_manager._tools[tool_name](**tool_args)
205
- response = {
206
- "jsonrpc": "2.0",
207
- "id": msg_id,
208
- "result": {
209
- "content": [{"type": "text", "text": json.dumps(result)}]
210
- }
211
- }
212
- else:
213
- response = {
214
- "jsonrpc": "2.0",
215
- "id": msg_id,
216
- "error": {"code": -32601, "message": f"Tool not found: {tool_name}"}
217
- }
218
- except Exception as e:
219
- response = {
220
- "jsonrpc": "2.0",
221
- "id": msg_id,
222
- "error": {"code": -32000, "message": str(e)}
223
- }
224
  else:
225
- response = {
226
- "jsonrpc": "2.0",
227
- "id": msg_id,
228
- "error": {"code": -32601, "message": f"Method not found: {method}"}
229
- }
230
-
231
- # Send response through SSE
232
- await sse_connections[session_id].put(response)
233
 
234
- return JSONResponse({"status": "accepted"}, status_code=202)
235
 
236
  except Exception as e:
237
- logger.error(f"Error handling message: {e}")
238
- return JSONResponse({"error": str(e)}, status_code=500)
239
-
240
- async def health_check(request: Request):
241
- """Health check endpoint."""
242
- return JSONResponse({
243
- "status": "ok",
244
- "service": "MaTableGPT MCP",
245
- "connections": len(sse_connections)
246
- })
247
-
248
- async def homepage(request: Request):
249
- """Homepage with service info."""
250
- html = """
251
- <!DOCTYPE html>
252
- <html>
253
- <head><title>MaTableGPT MCP Service</title></head>
254
- <body>
255
- <h1>🔬 MaTableGPT MCP Service</h1>
256
- <p>GPT-based Table Data Extractor from Materials Science Literature</p>
257
- <h2>Endpoints:</h2>
258
- <ul>
259
- <li><strong>SSE:</strong> <code>/sse</code></li>
260
- <li><strong>Messages:</strong> <code>/messages/</code></li>
261
- <li><strong>Health:</strong> <code>/health</code></li>
262
- </ul>
263
- <h2>Status: ✅ Running</h2>
264
- <p>Connect with: <code>{"url": "https://your-space.hf.space/sse"}</code></p>
265
- </body>
266
- </html>
267
- """
268
  return HTMLResponse(html)
269
 
270
- # Create Starlette app with routes
271
- app = Starlette(
272
- debug=True,
273
- routes=[
274
- Route("/", homepage),
275
- Route("/health", health_check),
276
- Route("/sse", handle_sse),
277
- Route("/messages/", handle_messages, methods=["POST"]),
278
- ]
279
- )
280
-
281
- logger.info(f"Starting MCP SSE server on {host}:{port}")
282
- logger.info(f"SSE Endpoint: http://{host}:{port}/sse")
283
- logger.info(f"Messages Endpoint: http://{host}:{port}/messages/")
284
 
285
- # Run with uvicorn
286
- uvicorn.run(
287
- app,
288
- host=host,
289
- port=port,
290
- log_level="info"
291
- )
292
 
293
 
294
  def run_stdio_server():
295
  """Run MCP server in stdio mode."""
296
  from mcp_service import mcp
297
- logger.info("Starting MCP server in stdio mode...")
298
  mcp.run()
299
 
300
 
301
  def main():
302
- """Main entry point."""
303
- # Get default values from environment variables
304
- default_host = os.environ.get('MCP_HOST', '0.0.0.0')
305
- default_port = int(os.environ.get('MCP_PORT', '7860'))
306
-
307
- parser = argparse.ArgumentParser(
308
- description="MaTableGPT MCP Server - Table Data Extraction from Materials Science Literature"
309
- )
310
- parser.add_argument(
311
- '--host',
312
- default=default_host,
313
- help=f'Host address (default: {default_host})'
314
- )
315
- parser.add_argument(
316
- '--port',
317
- type=int,
318
- default=default_port,
319
- help=f'Port number (default: {default_port})'
320
- )
321
- parser.add_argument(
322
- '--mode',
323
- choices=['stdio', 'sse'],
324
- default='sse',
325
- help='Run mode: stdio for standard I/O, sse for Server-Sent Events (default: sse)'
326
- )
327
- parser.add_argument(
328
- '--debug',
329
- action='store_true',
330
- help='Enable debug logging'
331
- )
332
 
333
  args = parser.parse_args()
334
 
335
- if args.debug:
336
- logging.getLogger().setLevel(logging.DEBUG)
337
-
338
- # Check dependencies
339
- missing = check_dependencies()
340
- if missing:
341
- logger.error(f"Missing required packages: {', '.join(missing)}")
342
- logger.error(f"Install with: pip install {' '.join(missing)}")
343
- sys.exit(1)
344
-
345
- # Check environment
346
- warnings = check_environment()
347
- for warning in warnings:
348
- logger.warning(warning)
349
-
350
- # Display startup info
351
- logger.info("=" * 60)
352
- logger.info("MaTableGPT MCP Server")
353
- logger.info("=" * 60)
354
- logger.info(f"Mode: {args.mode}")
355
- if args.mode == 'sse':
356
- logger.info(f"Host: {args.host}")
357
- logger.info(f"Port: {args.port}")
358
- logger.info(f"SSE Endpoint: http://{args.host}:{args.port}/sse")
359
- logger.info("=" * 60)
360
 
361
- # Run the server
362
- try:
363
- if args.mode == 'stdio':
364
- run_stdio_server()
365
- else:
366
- run_sse_server(args.host, args.port)
367
-
368
- except ImportError as e:
369
- logger.error(f"Failed to import required module: {e}")
370
- logger.error("Make sure all dependencies are installed: pip install -r requirements.txt")
371
- sys.exit(1)
372
- except Exception as e:
373
- logger.error(f"Error starting MCP server: {e}")
374
- import traceback
375
- traceback.print_exc()
376
- sys.exit(1)
377
 
378
 
379
  if __name__ == "__main__":
 
1
  #!/usr/bin/env python3
2
  """
3
+ MaTableGPT MCP Server Launcher (Simplified SSE)
4
+ ================================================
5
 
6
+ A minimal MCP SSE server implementation for HuggingFace Space.
 
7
 
8
  Usage:
9
  python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
 
 
 
 
 
 
 
 
 
 
 
 
10
  """
11
 
12
  import os
13
  import sys
14
  import argparse
15
  import logging
16
+ import json
17
+ import asyncio
18
+ import uuid
19
 
20
  # Add current directory to path for imports
21
  sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 
25
  level=logging.INFO,
26
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
27
  )
28
+ logger = logging.getLogger("matablgpt-mcp")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  def run_sse_server(host: str, port: int):
32
+ """Run MCP server in SSE mode."""
33
  import uvicorn
34
  from starlette.applications import Starlette
35
  from starlette.routing import Route
36
+ from starlette.responses import JSONResponse, HTMLResponse, StreamingResponse
37
  from starlette.requests import Request
 
 
38
 
39
+ # Import MCP service
40
  from mcp_service import mcp
41
 
42
+ # Store SSE connections
43
+ connections = {}
44
 
45
+ async def sse_endpoint(request: Request):
46
+ """SSE endpoint - client connects here first."""
47
+ conn_id = str(uuid.uuid4())
48
+ queue = asyncio.Queue()
49
+ connections[conn_id] = queue
50
 
51
+ logger.info(f"SSE connection: {conn_id}")
 
 
52
 
53
+ async def generate():
54
+ try:
55
+ # Send the message endpoint URL
56
+ # MCP expects: event: endpoint, data: /messages?sessionId=xxx
57
+ yield f"event: endpoint\ndata: /messages?sessionId={conn_id}\n\n"
58
+
59
+ while True:
60
+ try:
61
+ msg = await asyncio.wait_for(queue.get(), timeout=30)
62
+ yield f"event: message\ndata: {json.dumps(msg)}\n\n"
63
+ except asyncio.TimeoutError:
64
+ yield ": keepalive\n\n"
65
+ except asyncio.CancelledError:
66
+ pass
67
+ finally:
68
+ connections.pop(conn_id, None)
69
+ logger.info(f"SSE closed: {conn_id}")
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  return StreamingResponse(
72
+ generate(),
73
  media_type="text/event-stream",
74
  headers={
75
  "Cache-Control": "no-cache",
76
  "Connection": "keep-alive",
77
+ "X-Accel-Buffering": "no",
78
  }
79
  )
80
 
81
+ async def messages_endpoint(request: Request):
82
+ """Messages endpoint - client sends JSON-RPC here."""
83
+ session_id = request.query_params.get("sessionId")
84
+
85
+ if not session_id or session_id not in connections:
86
+ return JSONResponse({"error": "Invalid session"}, status_code=400)
87
+
88
  try:
 
 
 
 
 
 
 
89
  body = await request.json()
90
+ logger.info(f"Request: {body.get('method')}")
91
 
 
 
92
  method = body.get("method", "")
93
  params = body.get("params", {})
94
  msg_id = body.get("id")
95
 
96
+ # Process MCP methods
97
  if method == "initialize":
98
+ result = {
99
+ "protocolVersion": "2024-11-05",
100
+ "serverInfo": {"name": "MaTableGPT-MCP", "version": "1.0.0"},
101
+ "capabilities": {"tools": {}}
 
 
 
 
 
 
 
 
 
102
  }
103
+ elif method == "notifications/initialized":
104
+ # Just acknowledge, no response needed
105
+ return JSONResponse({"ok": True})
106
  elif method == "tools/list":
 
107
  tools = []
108
+ for name, func in mcp._tool_manager._tools.items():
109
+ tools.append({
110
+ "name": name,
111
+ "description": (func.__doc__ or "").split("\n")[0].strip(),
112
  "inputSchema": {"type": "object", "properties": {}}
113
+ })
114
+ result = {"tools": tools}
 
 
 
 
 
 
115
  elif method == "tools/call":
116
  tool_name = params.get("name")
117
  tool_args = params.get("arguments", {})
118
 
119
+ if tool_name not in mcp._tool_manager._tools:
120
+ raise Exception(f"Unknown tool: {tool_name}")
121
+
122
+ tool_result = mcp._tool_manager._tools[tool_name](**tool_args)
123
+ result = {"content": [{"type": "text", "text": json.dumps(tool_result)}]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  else:
125
+ raise Exception(f"Unknown method: {method}")
 
 
 
 
 
 
 
126
 
127
+ response = {"jsonrpc": "2.0", "id": msg_id, "result": result}
128
 
129
  except Exception as e:
130
+ logger.error(f"Error: {e}")
131
+ response = {
132
+ "jsonrpc": "2.0",
133
+ "id": body.get("id"),
134
+ "error": {"code": -32000, "message": str(e)}
135
+ }
136
+
137
+ # Send via SSE
138
+ await connections[session_id].put(response)
139
+ return JSONResponse({"ok": True})
140
+
141
+ async def health(request: Request):
142
+ return JSONResponse({"status": "ok", "service": "MaTableGPT-MCP"})
143
+
144
+ async def home(request: Request):
145
+ html = """<!DOCTYPE html>
146
+ <html><head><title>MaTableGPT MCP</title></head>
147
+ <body>
148
+ <h1>🔬 MaTableGPT MCP Service</h1>
149
+ <p>SSE Endpoint: <code>/sse</code></p>
150
+ <p>Status: ✅ Running</p>
151
+ </body></html>"""
 
 
 
 
 
 
 
 
 
152
  return HTMLResponse(html)
153
 
154
+ app = Starlette(routes=[
155
+ Route("/", home),
156
+ Route("/health", health),
157
+ Route("/sse", sse_endpoint),
158
+ Route("/messages", messages_endpoint, methods=["POST"]),
159
+ ])
 
 
 
 
 
 
 
 
160
 
161
+ logger.info(f"Starting SSE server on {host}:{port}")
162
+ uvicorn.run(app, host=host, port=port, log_level="info")
 
 
 
 
 
163
 
164
 
165
  def run_stdio_server():
166
  """Run MCP server in stdio mode."""
167
  from mcp_service import mcp
168
+ logger.info("Starting stdio mode...")
169
  mcp.run()
170
 
171
 
172
  def main():
173
+ parser = argparse.ArgumentParser(description="MaTableGPT MCP Server")
174
+ parser.add_argument('--host', default=os.environ.get('MCP_HOST', '0.0.0.0'))
175
+ parser.add_argument('--port', type=int, default=int(os.environ.get('MCP_PORT', '7860')))
176
+ parser.add_argument('--mode', choices=['stdio', 'sse'], default='sse')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  args = parser.parse_args()
179
 
180
+ # Log API config
181
+ api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
182
+ if api_base:
183
+ logger.info(f"API base: {api_base}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
 
185
+ if args.mode == 'stdio':
186
+ run_stdio_server()
187
+ else:
188
+ run_sse_server(args.host, args.port)
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
 
191
  if __name__ == "__main__":