Spaces:

SEUyishu
/

MatTableGPT

Sleeping

App Files Files Community

SEUyishu commited on Dec 4, 2025

Commit

c2d9a42

verified ·

1 Parent(s): b62550b

Upload 6 files

Browse files

Files changed (3) hide show

Dockerfile +6 -2
mcp_service.py +12 -2
start_mcp.py +368 -188

Dockerfile CHANGED Viewed

@@ -16,6 +16,10 @@ ENV PYTHONUNBUFFERED=1
 ENV MCP_HOST=0.0.0.0
 ENV MCP_PORT=7860
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
@@ -45,9 +49,9 @@ RUN chmod -R 777 /app/sessions /app/temp
 # Expose MCP SSE port (HuggingFace Spaces uses 7860)
 EXPOSE 7860
-# Health check for MCP SSE endpoint
 HEALTHCHECK --interval=30s --timeout=30s --start-period=10s --retries=3 \
-    CMD curl -f http://localhost:7860/sse || exit 1
 # Run MCP service in SSE mode
 CMD ["python", "start_mcp.py", "--mode", "sse", "--host", "0.0.0.0", "--port", "7860"]

 ENV MCP_HOST=0.0.0.0
 ENV MCP_PORT=7860
+# Disable MCP transport security for reverse proxy (HuggingFace Space)
+ENV MCP_TRANSPORT_SECURITY_ENABLED=false
+ENV FASTMCP_ALLOWED_HOSTS=*
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     build-essential \
 # Expose MCP SSE port (HuggingFace Spaces uses 7860)
 EXPOSE 7860
+# Health check endpoint
 HEALTHCHECK --interval=30s --timeout=30s --start-period=10s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
 # Run MCP service in SSE mode
 CMD ["python", "start_mcp.py", "--mode", "sse", "--host", "0.0.0.0", "--port", "7860"]

mcp_service.py CHANGED Viewed

@@ -858,8 +858,18 @@ def get_extractor() -> GPTExtractor:
         gpt_extractor = GPTExtractor()
     return gpt_extractor
-# Create MCP server
-mcp = FastMCP("MaTableGPT-MCP")
 # =============================================================================
 # MCP Tools

         gpt_extractor = GPTExtractor()
     return gpt_extractor
+# Create MCP server with permissive settings for HuggingFace Space
+# Disable host validation for reverse proxy environments
+mcp = FastMCP(
+    "MaTableGPT-MCP",
+    host="0.0.0.0",
+    port=7860
+)
+# Access the underlying server to configure it
+# This allows connections from any host (needed for HuggingFace Space)
+if hasattr(mcp, '_mcp_server'):
+    mcp._mcp_server.request_context = None  # Disable request validation
 # =============================================================================
 # MCP Tools

start_mcp.py CHANGED Viewed

@@ -1,188 +1,368 @@
-#!/usr/bin/env python3
-"""
-MaTableGPT MCP Server Launcher
-==============================
-This script starts the MaTableGPT MCP service for extracting
-table data from materials science literature.
-Usage:
-    python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
-Arguments:
-    --host      Host address (default: 0.0.0.0)
-    --port      Port number (default: 7860)
-    --mode      Run mode: 'stdio' or 'sse' (default: sse for HuggingFace Space)
-Environment Variables:
-    LLM_API_KEY / OPENAI_API_KEY     - API key for LLM service
-    LLM_API_BASE / OPENAI_API_BASE   - Custom API base URL (for third-party services)
-    LLM_MODEL / OPENAI_MODEL         - Model name (default: gpt-4-turbo-preview)
-    MCP_HOST                          - Server host (default: 0.0.0.0)
-    MCP_PORT                          - Server port (default: 7860)
-"""
-import os
-import sys
-import argparse
-import logging
-# Add current directory to path for imports
-sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger("matablgpt-mcp-launcher")
-def check_environment():
-    """Check if required environment variables are set."""
-    warnings = []
-    # Check for API key (support both naming conventions)
-    api_key = os.environ.get('LLM_API_KEY') or os.environ.get('OPENAI_API_KEY')
-    if not api_key:
-        warnings.append(
-            "LLM_API_KEY/OPENAI_API_KEY not set. GPT extraction features will not work. "
-            "Set it in HuggingFace Space secrets or environment variables."
-        )
-    # Check for API base (for third-party services)
-    api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
-    if api_base:
-        logger.info(f"Using custom API base: {api_base}")
-    return warnings
-def check_dependencies():
-    """Check if required packages are installed."""
-    missing = []
-    required = [
-        ('mcp', 'mcp[cli]'),
-        ('openai', 'openai'),
-        ('bs4', 'beautifulsoup4'),
-        ('pandas', 'pandas'),
-        ('lxml', 'lxml')
-    ]
-    for module, package in required:
-        try:
-            __import__(module)
-        except ImportError:
-            missing.append(package)
-    return missing
-def run_sse_server(host: str, port: int):
-    """Run MCP server in SSE mode using uvicorn."""
-    import uvicorn
-    from mcp_service import mcp
-    # Get the SSE app from FastMCP
-    sse_app = mcp.sse_app()
-    logger.info(f"Starting MCP SSE server on {host}:{port}")
-    logger.info(f"SSE Endpoint: http://{host}:{port}/sse")
-    # Run with uvicorn
-    uvicorn.run(
-        sse_app,
-        host=host,
-        port=port,
-        log_level="info"
-    )
-def run_stdio_server():
-    """Run MCP server in stdio mode."""
-    from mcp_service import mcp
-    logger.info("Starting MCP server in stdio mode...")
-    mcp.run()
-def main():
-    """Main entry point."""
-    # Get default values from environment variables
-    default_host = os.environ.get('MCP_HOST', '0.0.0.0')
-    default_port = int(os.environ.get('MCP_PORT', '7860'))
-    parser = argparse.ArgumentParser(
-        description="MaTableGPT MCP Server - Table Data Extraction from Materials Science Literature"
-    )
-    parser.add_argument(
-        '--host',
-        default=default_host,
-        help=f'Host address (default: {default_host})'
-    )
-    parser.add_argument(
-        '--port',
-        type=int,
-        default=default_port,
-        help=f'Port number (default: {default_port})'
-    )
-    parser.add_argument(
-        '--mode',
-        choices=['stdio', 'sse'],
-        default='sse',
-        help='Run mode: stdio for standard I/O, sse for Server-Sent Events (default: sse)'
-    )
-    parser.add_argument(
-        '--debug',
-        action='store_true',
-        help='Enable debug logging'
-    )
-    args = parser.parse_args()
-    if args.debug:
-        logging.getLogger().setLevel(logging.DEBUG)
-    # Check dependencies
-    missing = check_dependencies()
-    if missing:
-        logger.error(f"Missing required packages: {', '.join(missing)}")
-        logger.error(f"Install with: pip install {' '.join(missing)}")
-        sys.exit(1)
-    # Check environment
-    warnings = check_environment()
-    for warning in warnings:
-        logger.warning(warning)
-    # Display startup info
-    logger.info("=" * 60)
-    logger.info("MaTableGPT MCP Server")
-    logger.info("=" * 60)
-    logger.info(f"Mode: {args.mode}")
-    if args.mode == 'sse':
-        logger.info(f"Host: {args.host}")
-        logger.info(f"Port: {args.port}")
-        logger.info(f"SSE Endpoint: http://{args.host}:{args.port}/sse")
-    logger.info("=" * 60)
-    # Run the server
-    try:
-        if args.mode == 'stdio':
-            run_stdio_server()
-        else:
-            run_sse_server(args.host, args.port)
-    except ImportError as e:
-        logger.error(f"Failed to import required module: {e}")
-        logger.error("Make sure all dependencies are installed: pip install -r requirements.txt")
-        sys.exit(1)
-    except Exception as e:
-        logger.error(f"Error starting MCP server: {e}")
-        import traceback
-        traceback.print_exc()
-        sys.exit(1)
-if __name__ == "__main__":
-    main()

+#!/usr/bin/env python3
+"""
+MaTableGPT MCP Server Launcher
+==============================
+This script starts the MaTableGPT MCP service for extracting
+table data from materials science literature.
+Usage:
+    python start_mcp.py [--host HOST] [--port PORT] [--mode MODE]
+Arguments:
+    --host      Host address (default: 0.0.0.0)
+    --port      Port number (default: 7860)
+    --mode      Run mode: 'stdio' or 'sse' (default: sse for HuggingFace Space)
+Environment Variables:
+    LLM_API_KEY / OPENAI_API_KEY     - API key for LLM service
+    LLM_API_BASE / OPENAI_API_BASE   - Custom API base URL (for third-party services)
+    LLM_MODEL / OPENAI_MODEL         - Model name (default: gpt-4-turbo-preview)
+    MCP_HOST                          - Server host (default: 0.0.0.0)
+    MCP_PORT                          - Server port (default: 7860)
+"""
+import os
+import sys
+import argparse
+import logging
+# Add current directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger("matablgpt-mcp-launcher")
+def check_environment():
+    """Check if required environment variables are set."""
+    warnings = []
+    # Check for API key (support both naming conventions)
+    api_key = os.environ.get('LLM_API_KEY') or os.environ.get('OPENAI_API_KEY')
+    if not api_key:
+        warnings.append(
+            "LLM_API_KEY/OPENAI_API_KEY not set. GPT extraction features will not work. "
+            "Set it in HuggingFace Space secrets or environment variables."
+        )
+    # Check for API base (for third-party services)
+    api_base = os.environ.get('LLM_API_BASE') or os.environ.get('OPENAI_API_BASE')
+    if api_base:
+        logger.info(f"Using custom API base: {api_base}")
+    return warnings
+def check_dependencies():
+    """Check if required packages are installed."""
+    missing = []
+    required = [
+        ('mcp', 'mcp[cli]'),
+        ('openai', 'openai'),
+        ('bs4', 'beautifulsoup4'),
+        ('pandas', 'pandas'),
+        ('lxml', 'lxml')
+    ]
+    for module, package in required:
+        try:
+            __import__(module)
+        except ImportError:
+            missing.append(package)
+    return missing
+def run_sse_server(host: str, port: int):
+    """Run MCP server in SSE mode using uvicorn with custom app."""
+    import uvicorn
+    from starlette.applications import Starlette
+    from starlette.routing import Route
+    from starlette.responses import JSONResponse, HTMLResponse, Response
+    from starlette.requests import Request
+    import asyncio
+    import json
+    # Import MCP components
+    from mcp_service import mcp
+    # Store for SSE connections and their message queues
+    sse_connections = {}
+    async def handle_sse(request: Request):
+        """Handle SSE connections - returns event stream."""
+        import uuid
+        from sse_starlette.sse import EventSourceResponse
+        connection_id = str(uuid.uuid4())
+        message_queue = asyncio.Queue()
+        sse_connections[connection_id] = message_queue
+        logger.info(f"New SSE connection: {connection_id}")
+        async def event_generator():
+            try:
+                # Send initial connection event with endpoint info
+                yield {
+                    "event": "endpoint",
+                    "data": json.dumps({"url": f"/messages/?session_id={connection_id}"})
+                }
+                # Keep connection alive and send messages
+                while True:
+                    try:
+                        # Wait for messages with timeout for keepalive
+                        message = await asyncio.wait_for(message_queue.get(), timeout=30)
+                        yield {"event": "message", "data": json.dumps(message)}
+                    except asyncio.TimeoutError:
+                        # Send keepalive ping
+                        yield {"event": "ping", "data": ""}
+            except asyncio.CancelledError:
+                logger.info(f"SSE connection closed: {connection_id}")
+            finally:
+                sse_connections.pop(connection_id, None)
+        return EventSourceResponse(event_generator())
+    async def handle_messages(request: Request):
+        """Handle POST messages from MCP client."""
+        try:
+            session_id = request.query_params.get("session_id")
+            if not session_id or session_id not in sse_connections:
+                return JSONResponse(
+                    {"error": "Invalid or missing session_id"},
+                    status_code=400
+                )
+            body = await request.json()
+            logger.debug(f"Received message: {body}")
+            # Process the MCP request
+            # Get the method and params
+            method = body.get("method", "")
+            params = body.get("params", {})
+            msg_id = body.get("id")
+            # Handle different MCP methods
+            if method == "initialize":
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": msg_id,
+                    "result": {
+                        "protocolVersion": "2024-11-05",
+                        "serverInfo": {
+                            "name": "MaTableGPT-MCP",
+                            "version": "1.0.0"
+                        },
+                        "capabilities": {
+                            "tools": {"listChanged": True}
+                        }
+                    }
+                }
+            elif method == "tools/list":
+                # Get tools from MCP server
+                tools = []
+                for tool_name, tool_func in mcp._tool_manager._tools.items():
+                    tool_info = {
+                        "name": tool_name,
+                        "description": tool_func.__doc__ or "",
+                        "inputSchema": {"type": "object", "properties": {}}
+                    }
+                    tools.append(tool_info)
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": msg_id,
+                    "result": {"tools": tools}
+                }
+            elif method == "tools/call":
+                tool_name = params.get("name")
+                tool_args = params.get("arguments", {})
+                try:
+                    # Call the tool
+                    if tool_name in mcp._tool_manager._tools:
+                        result = mcp._tool_manager._tools[tool_name](**tool_args)
+                        response = {
+                            "jsonrpc": "2.0",
+                            "id": msg_id,
+                            "result": {
+                                "content": [{"type": "text", "text": json.dumps(result)}]
+                            }
+                        }
+                    else:
+                        response = {
+                            "jsonrpc": "2.0",
+                            "id": msg_id,
+                            "error": {"code": -32601, "message": f"Tool not found: {tool_name}"}
+                        }
+                except Exception as e:
+                    response = {
+                        "jsonrpc": "2.0",
+                        "id": msg_id,
+                        "error": {"code": -32000, "message": str(e)}
+                    }
+            else:
+                response = {
+                    "jsonrpc": "2.0",
+                    "id": msg_id,
+                    "error": {"code": -32601, "message": f"Method not found: {method}"}
+                }
+            # Send response through SSE
+            await sse_connections[session_id].put(response)
+            return JSONResponse({"status": "accepted"}, status_code=202)
+        except Exception as e:
+            logger.error(f"Error handling message: {e}")
+            return JSONResponse({"error": str(e)}, status_code=500)
+    async def health_check(request: Request):
+        """Health check endpoint."""
+        return JSONResponse({
+            "status": "ok",
+            "service": "MaTableGPT MCP",
+            "connections": len(sse_connections)
+        })
+    async def homepage(request: Request):
+        """Homepage with service info."""
+        html = """
+        <!DOCTYPE html>
+        <html>
+        <head><title>MaTableGPT MCP Service</title></head>
+        <body>
+            <h1>🔬 MaTableGPT MCP Service</h1>
+            <p>GPT-based Table Data Extractor from Materials Science Literature</p>
+            <h2>Endpoints:</h2>
+            <ul>
+                <li><strong>SSE:</strong> <code>/sse</code></li>
+                <li><strong>Messages:</strong> <code>/messages/</code></li>
+                <li><strong>Health:</strong> <code>/health</code></li>
+            </ul>
+            <h2>Status: ✅ Running</h2>
+            <p>Connect with: <code>{"url": "https://your-space.hf.space/sse"}</code></p>
+        </body>
+        </html>
+        """
+        return HTMLResponse(html)
+    # Create Starlette app with routes
+    app = Starlette(
+        debug=True,
+        routes=[
+            Route("/", homepage),
+            Route("/health", health_check),
+            Route("/sse", handle_sse),
+            Route("/messages/", handle_messages, methods=["POST"]),
+        ]
+    )
+    logger.info(f"Starting MCP SSE server on {host}:{port}")
+    logger.info(f"SSE Endpoint: http://{host}:{port}/sse")
+    logger.info(f"Messages Endpoint: http://{host}:{port}/messages/")
+    # Run with uvicorn
+    uvicorn.run(
+        app,
+        host=host,
+        port=port,
+        log_level="info"
+    )
+def run_stdio_server():
+    """Run MCP server in stdio mode."""
+    from mcp_service import mcp
+    logger.info("Starting MCP server in stdio mode...")
+    mcp.run()
+def main():
+    """Main entry point."""
+    # Get default values from environment variables
+    default_host = os.environ.get('MCP_HOST', '0.0.0.0')
+    default_port = int(os.environ.get('MCP_PORT', '7860'))
+    parser = argparse.ArgumentParser(
+        description="MaTableGPT MCP Server - Table Data Extraction from Materials Science Literature"
+    )
+    parser.add_argument(
+        '--host',
+        default=default_host,
+        help=f'Host address (default: {default_host})'
+    )
+    parser.add_argument(
+        '--port',
+        type=int,
+        default=default_port,
+        help=f'Port number (default: {default_port})'
+    )
+    parser.add_argument(
+        '--mode',
+        choices=['stdio', 'sse'],
+        default='sse',
+        help='Run mode: stdio for standard I/O, sse for Server-Sent Events (default: sse)'
+    )
+    parser.add_argument(
+        '--debug',
+        action='store_true',
+        help='Enable debug logging'
+    )
+    args = parser.parse_args()
+    if args.debug:
+        logging.getLogger().setLevel(logging.DEBUG)
+    # Check dependencies
+    missing = check_dependencies()
+    if missing:
+        logger.error(f"Missing required packages: {', '.join(missing)}")
+        logger.error(f"Install with: pip install {' '.join(missing)}")
+        sys.exit(1)
+    # Check environment
+    warnings = check_environment()
+    for warning in warnings:
+        logger.warning(warning)
+    # Display startup info
+    logger.info("=" * 60)
+    logger.info("MaTableGPT MCP Server")
+    logger.info("=" * 60)
+    logger.info(f"Mode: {args.mode}")
+    if args.mode == 'sse':
+        logger.info(f"Host: {args.host}")
+        logger.info(f"Port: {args.port}")
+        logger.info(f"SSE Endpoint: http://{args.host}:{args.port}/sse")
+    logger.info("=" * 60)
+    # Run the server
+    try:
+        if args.mode == 'stdio':
+            run_stdio_server()
+        else:
+            run_sse_server(args.host, args.port)
+    except ImportError as e:
+        logger.error(f"Failed to import required module: {e}")
+        logger.error("Make sure all dependencies are installed: pip install -r requirements.txt")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Error starting MCP server: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+if __name__ == "__main__":
+    main()