Spaces:

SEUyishu
/

MatTableGPT

Sleeping

App Files Files Community

SEUyishu commited on Dec 4, 2025

Commit

268cef6

verified ·

1 Parent(s): b410b8e

Update start_mcp.py

Browse files

Files changed (1) hide show

start_mcp.py +109 -297

start_mcp.py CHANGED Viewed

@@ -1,31 +1,21 @@
 #!/usr/bin/env python3
 """
-MaTableGPT MCP Server Launcher
-==============================
-This script starts the MaTableGPT MCP service for extracting
-table data from materials science literature.
 Usage:
     python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
-Arguments:
-    --host      Host address (default: 0.0.0.0)
-    --port      Port number (default: 7860)
-    --mode      Run mode: 'stdio' or 'sse' (default: sse for HuggingFace Space)
-Environment Variables:
-    LLM_API_KEY / OPENAI_API_KEY     - API key for LLM service
-    LLM_API_BASE / OPENAI_API_BASE   - Custom API base URL (for third-party services)
-    LLM_MODEL / OPENAI_MODEL         - Model name (default: gpt-4-turbo-preview)
-    MCP_HOST                          - Server host (default: 0.0.0.0)
-    MCP_PORT                          - Server port (default: 7860)
 """
 import os
 import sys
 import argparse
 import logging
 # Add current directory to path for imports
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
@@ -35,345 +25,167 @@ logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
-logger = logging.getLogger("matablgpt-mcp-launcher")
-def check_environment():
-    """Check if required environment variables are set."""
-    warnings = []
-    # Check for API key (support both naming conventions)
-    api_key = os.environ.get('LLM_API_KEY') or os.environ.get('OPENAI_API_KEY')
-    if not api_key:
-        warnings.append(
-            "LLM_API_KEY/OPENAI_API_KEY not set. GPT extraction features will not work. "
-            "Set it in HuggingFace Space secrets or environment variables."
-        )
-    # Check for API base (for third-party services)
-    api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
-    if api_base:
-        logger.info(f"Using custom API base: {api_base}")
-    return warnings
-def check_dependencies():
-    """Check if required packages are installed."""
-    missing = []
-    required = [
-        ('mcp', 'mcp[cli]'),
-        ('openai', 'openai'),
-        ('bs4', 'beautifulsoup4'),
-        ('pandas', 'pandas'),
-        ('lxml', 'lxml')
-    ]
-    for module, package in required:
-        try:
-            __import__(module)
-        except ImportError:
-            missing.append(package)
-    return missing
 def run_sse_server(host: str, port: int):
-    """Run MCP server in SSE mode using uvicorn with custom app."""
     import uvicorn
     from starlette.applications import Starlette
     from starlette.routing import Route
-    from starlette.responses import JSONResponse, HTMLResponse, Response
     from starlette.requests import Request
-    import asyncio
-    import json
-    # Import MCP components
     from mcp_service import mcp
-    # Store for SSE connections and their message queues
-    sse_connections = {}
-    async def handle_sse(request: Request):
-        """Handle SSE connections - returns event stream."""
-        import uuid
-        connection_id = str(uuid.uuid4())
-        message_queue = asyncio.Queue()
-        sse_connections[connection_id] = message_queue
-        logger.info(f"New SSE connection: {connection_id}")
-        async def event_generator():
-            """Generate SSE events."""
-            # Send initial connection event with endpoint info
-            endpoint_data = json.dumps({"url": f"/messages/?session_id={connection_id}"})
-            yield f"event: endpoint\ndata: {endpoint_data}\n\n"
-            # Keep connection alive and send messages
-            while True:
-                try:
-                    # Wait for messages with timeout for keepalive
-                    message = await asyncio.wait_for(message_queue.get(), timeout=30)
-                    message_data = json.dumps(message)
-                    yield f"event: message\ndata: {message_data}\n\n"
-                except asyncio.TimeoutError:
-                    # Send keepalive ping
-                    yield f": ping\n\n"
-                except asyncio.CancelledError:
-                    logger.info(f"SSE connection closed: {connection_id}")
-                    break
-                except Exception as e:
-                    logger.error(f"Error in SSE generator: {e}")
-                    break
-            # Cleanup
-            sse_connections.pop(connection_id, None)
-        from starlette.responses import StreamingResponse
         return StreamingResponse(
-            event_generator(),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",  # Disable nginx buffering
             }
         )
-    async def handle_messages(request: Request):
-        """Handle POST messages from MCP client."""
         try:
-            session_id = request.query_params.get("session_id")
-            if not session_id or session_id not in sse_connections:
-                return JSONResponse(
-                    {"error": "Invalid or missing session_id"},
-                    status_code=400
-                )
             body = await request.json()
-            logger.debug(f"Received message: {body}")
-            # Process the MCP request
-            # Get the method and params
             method = body.get("method", "")
             params = body.get("params", {})
             msg_id = body.get("id")
-            # Handle different MCP methods
             if method == "initialize":
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": msg_id,
-                    "result": {
-                        "protocolVersion": "2024-11-05",
-                        "serverInfo": {
-                            "name": "MaTableGPT-MCP",
-                            "version": "1.0.0"
-                        },
-                        "capabilities": {
-                            "tools": {"listChanged": True}
-                        }
-                    }
                 }
             elif method == "tools/list":
-                # Get tools from MCP server
                 tools = []
-                for tool_name, tool_func in mcp._tool_manager._tools.items():
-                    tool_info = {
-                        "name": tool_name,
-                        "description": tool_func.__doc__ or "",
                         "inputSchema": {"type": "object", "properties": {}}
-                    }
-                    tools.append(tool_info)
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": msg_id,
-                    "result": {"tools": tools}
-                }
             elif method == "tools/call":
                 tool_name = params.get("name")
                 tool_args = params.get("arguments", {})
-                try:
-                    # Call the tool
-                    if tool_name in mcp._tool_manager._tools:
-                        result = mcp._tool_manager._tools[tool_name](**tool_args)
-                        response = {
-                            "jsonrpc": "2.0",
-                            "id": msg_id,
-                            "result": {
-                                "content": [{"type": "text", "text": json.dumps(result)}]
-                            }
-                        }
-                    else:
-                        response = {
-                            "jsonrpc": "2.0",
-                            "id": msg_id,
-                            "error": {"code": -32601, "message": f"Tool not found: {tool_name}"}
-                        }
-                except Exception as e:
-                    response = {
-                        "jsonrpc": "2.0",
-                        "id": msg_id,
-                        "error": {"code": -32000, "message": str(e)}
-                    }
             else:
-                response = {
-                    "jsonrpc": "2.0",
-                    "id": msg_id,
-                    "error": {"code": -32601, "message": f"Method not found: {method}"}
-                }
-            # Send response through SSE
-            await sse_connections[session_id].put(response)
-            return JSONResponse({"status": "accepted"}, status_code=202)
         except Exception as e:
-            logger.error(f"Error handling message: {e}")
-            return JSONResponse({"error": str(e)}, status_code=500)
-    async def health_check(request: Request):
-        """Health check endpoint."""
-        return JSONResponse({
-            "status": "ok",
-            "service": "MaTableGPT MCP",
-            "connections": len(sse_connections)
-        })
-    async def homepage(request: Request):
-        """Homepage with service info."""
-        html = """
-        <!DOCTYPE html>
-        <html>
-        <head><title>MaTableGPT MCP Service</title></head>
-        <body>
-            <h1>🔬 MaTableGPT MCP Service</h1>
-            <p>GPT-based Table Data Extractor from Materials Science Literature</p>
-            <h2>Endpoints:</h2>
-            <ul>
-                <li><strong>SSE:</strong> <code>/sse</code></li>
-                <li><strong>Messages:</strong> <code>/messages/</code></li>
-                <li><strong>Health:</strong> <code>/health</code></li>
-            </ul>
-            <h2>Status: ✅ Running</h2>
-            <p>Connect with: <code>{"url": "https://your-space.hf.space/sse"}</code></p>
-        </body>
-        </html>
-        """
         return HTMLResponse(html)
-    # Create Starlette app with routes
-    app = Starlette(
-        debug=True,
-        routes=[
-            Route("/", homepage),
-            Route("/health", health_check),
-            Route("/sse", handle_sse),
-            Route("/messages/", handle_messages, methods=["POST"]),
-        ]
-    )
-    logger.info(f"Starting MCP SSE server on {host}:{port}")
-    logger.info(f"SSE Endpoint: http://{host}:{port}/sse")
-    logger.info(f"Messages Endpoint: http://{host}:{port}/messages/")
-    # Run with uvicorn
-    uvicorn.run(
-        app,
-        host=host,
-        port=port,
-        log_level="info"
-    )
 def run_stdio_server():
     """Run MCP server in stdio mode."""
     from mcp_service import mcp
-    logger.info("Starting MCP server in stdio mode...")
     mcp.run()
 def main():
-    """Main entry point."""
-    # Get default values from environment variables
-    default_host = os.environ.get('MCP_HOST', '0.0.0.0')
-    default_port = int(os.environ.get('MCP_PORT', '7860'))
-    parser = argparse.ArgumentParser(
-        description="MaTableGPT MCP Server - Table Data Extraction from Materials Science Literature"
-    )
-    parser.add_argument(
-        '--host',
-        default=default_host,
-        help=f'Host address (default: {default_host})'
-    )
-    parser.add_argument(
-        '--port',
-        type=int,
-        default=default_port,
-        help=f'Port number (default: {default_port})'
-    )
-    parser.add_argument(
-        '--mode',
-        choices=['stdio', 'sse'],
-        default='sse',
-        help='Run mode: stdio for standard I/O, sse for Server-Sent Events (default: sse)'
-    )
-    parser.add_argument(
-        '--debug',
-        action='store_true',
-        help='Enable debug logging'
-    )
     args = parser.parse_args()
-    if args.debug:
-        logging.getLogger().setLevel(logging.DEBUG)
-    # Check dependencies
-    missing = check_dependencies()
-    if missing:
-        logger.error(f"Missing required packages: {', '.join(missing)}")
-        logger.error(f"Install with: pip install {' '.join(missing)}")
-        sys.exit(1)
-    # Check environment
-    warnings = check_environment()
-    for warning in warnings:
-        logger.warning(warning)
-    # Display startup info
-    logger.info("=" * 60)
-    logger.info("MaTableGPT MCP Server")
-    logger.info("=" * 60)
-    logger.info(f"Mode: {args.mode}")
-    if args.mode == 'sse':
-        logger.info(f"Host: {args.host}")
-        logger.info(f"Port: {args.port}")
-        logger.info(f"SSE Endpoint: http://{args.host}:{args.port}/sse")
-    logger.info("=" * 60)
-    # Run the server
-    try:
-        if args.mode == 'stdio':
-            run_stdio_server()
-        else:
-            run_sse_server(args.host, args.port)
-    except ImportError as e:
-        logger.error(f"Failed to import required module: {e}")
-        logger.error("Make sure all dependencies are installed: pip install -r requirements.txt")
-        sys.exit(1)
-    except Exception as e:
-        logger.error(f"Error starting MCP server: {e}")
-        import traceback
-        traceback.print_exc()
-        sys.exit(1)
 if __name__ == "__main__":

 #!/usr/bin/env python3
 """
+MaTableGPT MCP Server Launcher (Simplified SSE)
+================================================
+A minimal MCP SSE server implementation for HuggingFace Space.
 Usage:
     python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
 """
 import os
 import sys
 import argparse
 import logging
+import json
+import asyncio
+import uuid
 # Add current directory to path for imports
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
     level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
+logger = logging.getLogger("matablgpt-mcp")
 def run_sse_server(host: str, port: int):
+    """Run MCP server in SSE mode."""
     import uvicorn
     from starlette.applications import Starlette
     from starlette.routing import Route
+    from starlette.responses import JSONResponse, HTMLResponse, StreamingResponse
     from starlette.requests import Request
+    # Import MCP service
     from mcp_service import mcp
+    # Store SSE connections
+    connections = {}
+    async def sse_endpoint(request: Request):
+        """SSE endpoint - client connects here first."""
+        conn_id = str(uuid.uuid4())
+        queue = asyncio.Queue()
+        connections[conn_id] = queue
+        logger.info(f"SSE connection: {conn_id}")
+        async def generate():
+            try:
+                # Send the message endpoint URL
+                # MCP expects: event: endpoint, data: /messages?sessionId=xxx
+                yield f"event: endpoint\ndata: /messages?sessionId={conn_id}\n\n"
+                while True:
+                    try:
+                        msg = await asyncio.wait_for(queue.get(), timeout=30)
+                        yield f"event: message\ndata: {json.dumps(msg)}\n\n"
+                    except asyncio.TimeoutError:
+                        yield ": keepalive\n\n"
+            except asyncio.CancelledError:
+                pass
+            finally:
+                connections.pop(conn_id, None)
+                logger.info(f"SSE closed: {conn_id}")
         return StreamingResponse(
+            generate(),
             media_type="text/event-stream",
             headers={
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no",
             }
         )
+    async def messages_endpoint(request: Request):
+        """Messages endpoint - client sends JSON-RPC here."""
+        session_id = request.query_params.get("sessionId")
+        if not session_id or session_id not in connections:
+            return JSONResponse({"error": "Invalid session"}, status_code=400)
         try:
             body = await request.json()
+            logger.info(f"Request: {body.get('method')}")
             method = body.get("method", "")
             params = body.get("params", {})
             msg_id = body.get("id")
+            # Process MCP methods
             if method == "initialize":
+                result = {
+                    "protocolVersion": "2024-11-05",
+                    "serverInfo": {"name": "MaTableGPT-MCP", "version": "1.0.0"},
+                    "capabilities": {"tools": {}}
                 }
+            elif method == "notifications/initialized":
+                # Just acknowledge, no response needed
+                return JSONResponse({"ok": True})
             elif method == "tools/list":
                 tools = []
+                for name, func in mcp._tool_manager._tools.items():
+                    tools.append({
+                        "name": name,
+                        "description": (func.__doc__ or "").split("\n")[0].strip(),
                         "inputSchema": {"type": "object", "properties": {}}
+                    })
+                result = {"tools": tools}
             elif method == "tools/call":
                 tool_name = params.get("name")
                 tool_args = params.get("arguments", {})
+                if tool_name not in mcp._tool_manager._tools:
+                    raise Exception(f"Unknown tool: {tool_name}")
+                tool_result = mcp._tool_manager._tools[tool_name](**tool_args)
+                result = {"content": [{"type": "text", "text": json.dumps(tool_result)}]}
             else:
+                raise Exception(f"Unknown method: {method}")
+            response = {"jsonrpc": "2.0", "id": msg_id, "result": result}
         except Exception as e:
+            logger.error(f"Error: {e}")
+            response = {
+                "jsonrpc": "2.0",
+                "id": body.get("id"),
+                "error": {"code": -32000, "message": str(e)}
+            }
+        # Send via SSE
+        await connections[session_id].put(response)
+        return JSONResponse({"ok": True})
+    async def health(request: Request):
+        return JSONResponse({"status": "ok", "service": "MaTableGPT-MCP"})
+    async def home(request: Request):
+        html = """<!DOCTYPE html>
+<html><head><title>MaTableGPT MCP</title></head>
+<body>
+<h1>🔬 MaTableGPT MCP Service</h1>
+<p>SSE Endpoint: <code>/sse</code></p>
+<p>Status: ✅ Running</p>
+</body></html>"""
         return HTMLResponse(html)
+    app = Starlette(routes=[
+        Route("/", home),
+        Route("/health", health),
+        Route("/sse", sse_endpoint),
+        Route("/messages", messages_endpoint, methods=["POST"]),
+    ])
+    logger.info(f"Starting SSE server on {host}:{port}")
+    uvicorn.run(app, host=host, port=port, log_level="info")
 def run_stdio_server():
     """Run MCP server in stdio mode."""
     from mcp_service import mcp
+    logger.info("Starting stdio mode...")
     mcp.run()
 def main():
+    parser = argparse.ArgumentParser(description="MaTableGPT MCP Server")
+    parser.add_argument('--host', default=os.environ.get('MCP_HOST', '0.0.0.0'))
+    parser.add_argument('--port', type=int, default=int(os.environ.get('MCP_PORT', '7860')))
+    parser.add_argument('--mode', choices=['stdio', 'sse'], default='sse')
     args = parser.parse_args()
+    # Log API config
+    api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
+    if api_base:
+        logger.info(f"API base: {api_base}")
+    if args.mode == 'stdio':
+        run_stdio_server()
+    else:
+        run_sse_server(args.host, args.port)
 if __name__ == "__main__":