Spaces:

darkfire514
/

OpenSpace

Running

App Files Files Community

darkfire514 commited on 22 days ago

Commit

399b80c

verified ·

1 Parent(s): 06c5a6b

Upload 160 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

openspace/.env.example +53 -0
openspace/__init__.py +71 -0
openspace/__main__.py +473 -0
openspace/agents/__init__.py +9 -0
openspace/agents/base.py +194 -0
openspace/agents/grounding_agent.py +1212 -0
openspace/cloud/__init__.py +31 -0
openspace/cloud/auth.py +102 -0
openspace/cloud/cli/__init__.py +0 -0
openspace/cloud/cli/download_skill.py +63 -0
openspace/cloud/cli/upload_skill.py +83 -0
openspace/cloud/client.py +497 -0
openspace/cloud/embedding.py +129 -0
openspace/cloud/search.py +393 -0
openspace/config/README.md +115 -0
openspace/config/__init__.py +32 -0
openspace/config/config_agents.json +11 -0
openspace/config/config_dev.json.example +12 -0
openspace/config/config_grounding.json +82 -0
openspace/config/config_mcp.json.example +11 -0
openspace/config/config_security.json +68 -0
openspace/config/constants.py +23 -0
openspace/config/grounding.py +311 -0
openspace/config/loader.py +177 -0
openspace/config/utils.py +30 -0
openspace/dashboard_server.py +639 -0
openspace/grounding/backends/__init__.py +34 -0
openspace/grounding/backends/gui/__init__.py +25 -0
openspace/grounding/backends/gui/anthropic_client.py +575 -0
openspace/grounding/backends/gui/anthropic_utils.py +241 -0
openspace/grounding/backends/gui/config.py +76 -0
openspace/grounding/backends/gui/provider.py +143 -0
openspace/grounding/backends/gui/session.py +188 -0
openspace/grounding/backends/gui/tool.py +712 -0
openspace/grounding/backends/gui/transport/actions.py +232 -0
openspace/grounding/backends/gui/transport/connector.py +389 -0
openspace/grounding/backends/gui/transport/local_connector.py +364 -0
openspace/grounding/backends/mcp/__init__.py +41 -0
openspace/grounding/backends/mcp/client.py +409 -0
openspace/grounding/backends/mcp/config.py +132 -0
openspace/grounding/backends/mcp/installer.py +697 -0
openspace/grounding/backends/mcp/provider.py +473 -0
openspace/grounding/backends/mcp/session.py +75 -0
openspace/grounding/backends/mcp/tool_cache.py +254 -0
openspace/grounding/backends/mcp/tool_converter.py +194 -0
openspace/grounding/backends/mcp/transport/connectors/__init__.py +20 -0
openspace/grounding/backends/mcp/transport/connectors/base.py +374 -0
openspace/grounding/backends/mcp/transport/connectors/http.py +705 -0
openspace/grounding/backends/mcp/transport/connectors/sandbox.py +251 -0
openspace/grounding/backends/mcp/transport/connectors/stdio.py +76 -0

openspace/.env.example ADDED Viewed

	@@ -0,0 +1,53 @@

+# ============================================
+# OpenSpace Environment Variables
+# Copy this file to .env and fill in your keys
+# ============================================
+# ---- LLM API Keys ----
+# At least one LLM API key is required for OpenSpace to function.
+# OpenSpace uses LiteLLM for model routing, so the key you need depends on your chosen model.
+# See https://docs.litellm.ai/docs/providers for supported providers.
+# Anthropic (for anthropic/claude-* models)
+# ANTHROPIC_API_KEY=
+# OpenAI (for openai/gpt-* models)
+# OPENAI_API_KEY=
+# OpenRouter (for openrouter/* models, e.g. openrouter/anthropic/claude-sonnet-4.5)
+OPENROUTER_API_KEY=
+# ── OpenSpace Cloud (optional) ──────────────────────────────
+# Register at https://open-space.cloud to get your key.
+# Enables cloud skill search & upload; local features work without it.
+OPENSPACE_API_KEY=sk_xxxxxxxxxxxxxxxx
+# ---- GUI Backend (Anthropic Computer Use) ----
+# Required only if using the GUI backend. Uses the same ANTHROPIC_API_KEY above.
+# Optional backup key for rate limit fallback:
+# ANTHROPIC_API_KEY_BACKUP=
+# ---- Web Backend (Deep Research) ----
+# Required only if using the Web backend for deep research.
+# Uses OpenRouter API by default:
+# OPENROUTER_API_KEY=
+# ---- Embedding (Optional) ----
+# For remote embedding API instead of local model.
+# If not set, OpenSpace uses a local embedding model (BAAI/bge-small-en-v1.5).
+# EMBEDDING_BASE_URL=
+# EMBEDDING_API_KEY=
+# EMBEDDING_MODEL= "openai/text-embedding-3-small"
+# ---- E2B Sandbox (Optional) ----
+# Required only if sandbox mode is enabled in security config.
+# E2B_API_KEY=
+# ---- Local Server (Optional) ----
+# Override the default local server URL (default: http://127.0.0.1:5000)
+# Useful for remote VM integration (e.g., OSWorld).
+# LOCAL_SERVER_URL=http://127.0.0.1:5000
+# ---- Debug (Optional) ----
+# OPENSPACE_DEBUG=true

openspace/__init__.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from importlib import import_module as _imp
+from typing import Dict as _Dict, Any as _Any, TYPE_CHECKING as _TYPE_CHECKING
+if _TYPE_CHECKING:
+    from openspace.tool_layer import OpenSpace as OpenSpace, OpenSpaceConfig as OpenSpaceConfig
+    from openspace.agents import GroundingAgent as GroundingAgent
+    from openspace.llm import LLMClient as LLMClient
+    from openspace.recording import RecordingManager as RecordingManager
+__version__ = "0.1.0"
+__all__ = [
+    # Version
+    "__version__",
+    # Main API
+    "OpenSpace",
+    "OpenSpaceConfig",
+    # Core Components
+    "GroundingAgent",
+    "GroundingClient",
+    "LLMClient",
+    "BaseTool",
+    "ToolResult",
+    "BackendType",
+    # Recording System
+    "RecordingManager",
+    "RecordingViewer",
+]
+# Map attribute → sub-module that provides it
+_attr_to_module: _Dict[str, str] = {
+    # Main API
+    "OpenSpace": "openspace.tool_layer",
+    "OpenSpaceConfig": "openspace.tool_layer",
+    # Core Components
+    "GroundingAgent": "openspace.agents",
+    "GroundingClient": "openspace.grounding.core.grounding_client",
+    "LLMClient": "openspace.llm",
+    "BaseTool": "openspace.grounding.core.tool.base",
+    "ToolResult": "openspace.grounding.core.types",
+    "BackendType": "openspace.grounding.core.types",
+    # Recording System
+    "RecordingManager": "openspace.recording",
+    "RecordingViewer": "openspace.recording.viewer",
+}
+def __getattr__(name: str) -> _Any:
+    """Dynamically import sub-modules on first attribute access.
+    This keeps the *initial* package import lightweight and avoids raising
+    `ModuleNotFoundError` for optional / heavy dependencies until the
+    corresponding functionality is explicitly used.
+    """
+    if name not in _attr_to_module:
+        raise AttributeError(f"module 'openspace' has no attribute '{name}'")
+    module_name = _attr_to_module[name]
+    module = _imp(module_name)
+    value = getattr(module, name)
+    globals()[name] = value
+    return value
+def __dir__():
+    return sorted(list(globals().keys()) + list(_attr_to_module.keys()))

openspace/__main__.py ADDED Viewed

	@@ -0,0 +1,473 @@

+import asyncio
+import argparse
+import sys
+import logging
+from typing import Optional
+from openspace.tool_layer import OpenSpace, OpenSpaceConfig
+from openspace.utils.logging import Logger
+from openspace.utils.ui import create_ui, OpenSpaceUI
+from openspace.utils.ui_integration import UIIntegration
+from openspace.utils.cli_display import CLIDisplay
+from openspace.utils.display import colorize
+logger = Logger.get_logger(__name__)
+class UIManager:
+    def __init__(self, ui: Optional[OpenSpaceUI], ui_integration: Optional[UIIntegration]):
+        self.ui = ui
+        self.ui_integration = ui_integration
+        self._original_log_levels = {}
+    async def start_live_display(self):
+        if not self.ui or not self.ui_integration:
+            return
+        print()
+        print(colorize("  ▣ Starting real-time visualization...", 'c'))
+        print()
+        await asyncio.sleep(1)
+        self._suppress_logs()
+        await self.ui.start_live_display()
+        await self.ui_integration.start_monitoring(poll_interval=2.0)
+    async def stop_live_display(self):
+        if not self.ui or not self.ui_integration:
+            return
+        await self.ui_integration.stop_monitoring()
+        await self.ui.stop_live_display()
+        self._restore_logs()
+    def print_summary(self, result: dict):
+        if self.ui:
+            self.ui.print_summary(result)
+        else:
+            CLIDisplay.print_result_summary(result)
+    def _suppress_logs(self):
+        log_names = ["openspace", "openspace.grounding", "openspace.agents"]
+        for name in log_names:
+            log = logging.getLogger(name)
+            self._original_log_levels[name] = log.level
+            log.setLevel(logging.CRITICAL)
+    def _restore_logs(self):
+        for name, level in self._original_log_levels.items():
+            logging.getLogger(name).setLevel(level)
+        self._original_log_levels.clear()
+async def _execute_task(openspace: OpenSpace, query: str, ui_manager: UIManager):
+    await ui_manager.start_live_display()
+    result = await openspace.execute(query)
+    await ui_manager.stop_live_display()
+    ui_manager.print_summary(result)
+    return result
+async def interactive_mode(openspace: OpenSpace, ui_manager: UIManager):
+    CLIDisplay.print_interactive_header()
+    while True:
+        try:
+            prompt = colorize(">>> ", 'c', bold=True)
+            query = input(f"\n{prompt}").strip()
+            if not query:
+                continue
+            if query.lower() in ['exit', 'quit', 'q']:
+                print("\nExiting...")
+                break
+            if query.lower() == 'status':
+                _print_status(openspace)
+                continue
+            if query.lower() == 'help':
+                CLIDisplay.print_help()
+                continue
+            CLIDisplay.print_task_header(query)
+            await _execute_task(openspace, query, ui_manager)
+        except KeyboardInterrupt:
+            print("\n\nInterrupt signal detected, exiting...")
+            break
+        except Exception as e:
+            logger.error(f"Error: {e}", exc_info=True)
+            print(f"\nError: {e}")
+async def single_query_mode(openspace: OpenSpace, query: str, ui_manager: UIManager):
+    CLIDisplay.print_task_header(query, title="▶ Single Query Execution")
+    await _execute_task(openspace, query, ui_manager)
+def _print_status(openspace: OpenSpace):
+    """Print system status"""
+    from openspace.utils.display import Box, BoxStyle
+    box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
+    print()
+    print(box.text_line(colorize("System Status", 'bl', bold=True),
+                      align='center', indent=4, text_color=''))
+    print(box.separator_line(indent=4))
+    status_lines = [
+        f"Initialized: {colorize('Yes' if openspace.is_initialized() else 'No', 'g' if openspace.is_initialized() else 'rd')}",
+        f"Running: {colorize('Yes' if openspace.is_running() else 'No', 'y' if openspace.is_running() else 'g')}",
+        f"Model: {colorize(openspace.config.llm_model, 'c')}",
+    ]
+    if openspace.is_initialized():
+        backends = openspace.list_backends()
+        status_lines.append(f"Backends: {colorize(', '.join(backends), 'c')}")
+        sessions = openspace.list_sessions()
+        status_lines.append(f"Active Sessions: {colorize(str(len(sessions)), 'y')}")
+    for line in status_lines:
+        print(box.text_line(f"  {line}", indent=4, text_color=''))
+    print(box.bottom_line(indent=4))
+    print()
+def _create_argument_parser() -> argparse.ArgumentParser:
+    """Create command-line argument parser"""
+    parser = argparse.ArgumentParser(
+        description='OpenSpace - Self-Evolving Skill Worker & Community',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    # Subcommands
+    subparsers = parser.add_subparsers(dest='command', help='Available commands')
+    # refresh-cache subcommand
+    cache_parser = subparsers.add_parser(
+        'refresh-cache',
+        help='Refresh MCP tool cache (starts all servers once)'
+    )
+    cache_parser.add_argument(
+        '--config', '-c', type=str,
+        help='MCP configuration file path'
+    )
+    # Basic arguments (for run mode)
+    parser.add_argument('--config', '-c', type=str, help='Configuration file path (JSON format)')
+    parser.add_argument('--query', '-q', type=str, help='Single query mode: execute query directly')
+    # LLM arguments
+    parser.add_argument('--model', '-m', type=str, help='LLM model name')
+    # Logging arguments
+    parser.add_argument('--log-level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], help='Log level')
+    # Execution arguments
+    parser.add_argument('--max-iterations', type=int, help='Maximum iteration count')
+    parser.add_argument('--timeout', type=float, help='LLM API call timeout (seconds)')
+    # UI arguments
+    parser.add_argument('--interactive', '-i', action='store_true', help='Force interactive mode')
+    parser.add_argument('--no-ui', action='store_true', help='Disable visualization UI')
+    parser.add_argument('--ui-compact', action='store_true', help='Use compact UI layout')
+    return parser
+async def refresh_mcp_cache(config_path: Optional[str] = None):
+    """Refresh MCP tool cache by starting servers one by one and saving tool metadata."""
+    from openspace.grounding.backends.mcp import MCPProvider, get_tool_cache
+    from openspace.grounding.core.types import SessionConfig, BackendType
+    from openspace.config import load_config, get_config
+    print("Refreshing MCP tool cache...")
+    print("Servers will be started one by one (start -> get tools -> close).")
+    print()
+    # Load config
+    if config_path:
+        config = load_config(config_path)
+    else:
+        config = get_config()
+    # Get MCP config
+    mcp_config = getattr(config, 'mcp', None) or {}
+    if hasattr(mcp_config, 'model_dump'):
+        mcp_config = mcp_config.model_dump()
+    # Skip dependency checks for refresh-cache (servers are pre-validated)
+    mcp_config["check_dependencies"] = False
+    # Create provider
+    provider = MCPProvider(config=mcp_config)
+    await provider.initialize()
+    servers = provider.list_servers()
+    total = len(servers)
+    print(f"Found {total} MCP servers configured")
+    print()
+    cache = get_tool_cache()
+    cache.set_server_order(servers)  # Preserve config order when saving
+    total_tools = 0
+    success_count = 0
+    skipped_count = 0
+    failed_servers = []
+    # Load existing cache to skip already processed servers
+    existing_cache = cache.get_all_tools()
+    # Timeout for each server (in seconds)
+    SERVER_TIMEOUT = 60
+    # Process servers one by one
+    for i, server_name in enumerate(servers, 1):
+        # Skip if already cached (resume support)
+        if server_name in existing_cache:
+            cached_tools = existing_cache[server_name]
+            total_tools += len(cached_tools)
+            skipped_count += 1
+            print(f"[{i}/{total}] {server_name}... ⏭ cached ({len(cached_tools)} tools)")
+            continue
+        print(f"[{i}/{total}] {server_name}...", end=" ", flush=True)
+        session_id = f"mcp-{server_name}"
+        try:
+            # Create session and get tools with timeout protection
+            async with asyncio.timeout(SERVER_TIMEOUT):
+                # Create session for this server
+                cfg = SessionConfig(
+                    session_name=session_id,
+                    backend_type=BackendType.MCP,
+                    connection_params={"server": server_name},
+                )
+                session = await provider.create_session(cfg)
+                # Get tools from this server
+                tools = await session.list_tools()
+            # Convert to metadata format
+            tool_metadata = []
+            for tool in tools:
+                tool_metadata.append({
+                    "name": tool.schema.name,
+                    "description": tool.schema.description or "",
+                    "parameters": tool.schema.parameters or {},
+                })
+            # Save to cache (incremental)
+            cache.save_server(server_name, tool_metadata)
+            # Close session immediately to free resources
+            await provider.close_session(session_id)
+            total_tools += len(tools)
+            success_count += 1
+            print(f"✓ {len(tools)} tools")
+        except asyncio.TimeoutError:
+            error_msg = f"Timeout after {SERVER_TIMEOUT}s"
+            failed_servers.append((server_name, error_msg))
+            print(f"✗ {error_msg}")
+            # Save failed server info to cache
+            cache.save_failed_server(server_name, error_msg)
+            # Try to close session if it was created
+            try:
+                await provider.close_session(session_id)
+            except Exception:
+                pass
+        except Exception as e:
+            error_msg = str(e)
+            failed_servers.append((server_name, error_msg))
+            print(f"✗ {error_msg[:50]}")
+            # Save failed server info to cache
+            cache.save_failed_server(server_name, error_msg)
+            # Try to close session if it was created
+            try:
+                await provider.close_session(session_id)
+            except Exception:
+                pass
+    print()
+    print(f"{'='*50}")
+    print(f"✓ Collected {total_tools} tools from {success_count + skipped_count}/{total} servers")
+    if skipped_count > 0:
+        print(f"  (skipped {skipped_count} cached, processed {success_count} new)")
+    print(f"✓ Cache saved to: {cache.cache_path}")
+    if failed_servers:
+        print(f"✗ Failed servers ({len(failed_servers)}):")
+        for name, err in failed_servers[:10]:
+            print(f"  - {name}: {err[:60]}")
+        if len(failed_servers) > 10:
+            print(f"  ... and {len(failed_servers) - 10} more (see cache file for details)")
+    print()
+    print("Done! Future list_tools() calls will use cache (no server startup).")
+def _load_config(args) -> OpenSpaceConfig:
+    """Load configuration"""
+    cli_overrides = {}
+    if args.model:
+        cli_overrides['llm_model'] = args.model
+    if args.max_iterations is not None:
+        cli_overrides['grounding_max_iterations'] = args.max_iterations
+    if args.timeout is not None:
+        cli_overrides['llm_timeout'] = args.timeout
+    if args.log_level:
+        cli_overrides['log_level'] = args.log_level
+    try:
+        # Load from config file if provided
+        if args.config:
+            import json
+            with open(args.config, 'r', encoding='utf-8') as f:
+                config_dict = json.load(f)
+            # Apply CLI overrides
+            config_dict.update(cli_overrides)
+            config = OpenSpaceConfig(**config_dict)
+            print(f"✓ Loaded from config file: {args.config}")
+        else:
+            # Use default config + CLI overrides
+            config = OpenSpaceConfig(**cli_overrides)
+            print("✓ Using default configuration")
+        if cli_overrides:
+            print(f"✓ CLI overrides: {', '.join(cli_overrides.keys())}")
+        if args.log_level:
+            Logger.set_level(args.log_level)
+        return config
+    except Exception as e:
+        logger.error(f"Failed to load configuration: {e}")
+        sys.exit(1)
+def _setup_ui(args) -> tuple[Optional[OpenSpaceUI], Optional[UIIntegration]]:
+    if args.no_ui:
+        CLIDisplay.print_banner()
+        return None, None
+    ui = create_ui(enable_live=True, compact=args.ui_compact)
+    ui.print_banner()
+    ui_integration = UIIntegration(ui)
+    return ui, ui_integration
+async def _initialize_openspace(config: OpenSpaceConfig, args) -> OpenSpace:
+    openspace = OpenSpace(config)
+    init_steps = [("Initializing OpenSpace...", "loading")]
+    CLIDisplay.print_initialization_progress(init_steps, show_header=False)
+    if not args.config:
+        original_log_level = Logger.get_logger("openspace").level
+        for log_name in ["openspace", "openspace.grounding", "openspace.agents"]:
+            Logger.get_logger(log_name).setLevel(logging.WARNING)
+    await openspace.initialize()
+    # Restore log level
+    if not args.config:
+        for log_name in ["openspace", "openspace.grounding", "openspace.agents"]:
+            Logger.get_logger(log_name).setLevel(original_log_level)
+    # Print initialization results
+    backends = openspace.list_backends()
+    init_steps = [
+        ("LLM Client", "ok"),
+        (f"Grounding Backends ({len(backends)} available)", "ok"),
+        ("Grounding Agent", "ok"),
+    ]
+    if config.enable_recording:
+        init_steps.append(("Recording Manager", "ok"))
+    CLIDisplay.print_initialization_progress(init_steps, show_header=True)
+    return openspace
+async def main():
+    parser = _create_argument_parser()
+    args = parser.parse_args()
+    # Handle subcommands
+    if args.command == 'refresh-cache':
+        await refresh_mcp_cache(args.config)
+        return 0
+    # Load configuration
+    config = _load_config(args)
+    # Setup UI
+    ui, ui_integration = _setup_ui(args)
+    # Print configuration
+    CLIDisplay.print_configuration(config)
+    openspace = None
+    try:
+        # Initialize OpenSpace
+        openspace = await _initialize_openspace(config, args)
+        # Connect UI (if enabled)
+        if ui_integration:
+            ui_integration.attach_llm_client(openspace._llm_client)
+            ui_integration.attach_grounding_client(openspace._grounding_client)
+            CLIDisplay.print_system_ready()
+        ui_manager = UIManager(ui, ui_integration)
+        # Run appropriate mode
+        if args.query:
+            await single_query_mode(openspace, args.query, ui_manager)
+        else:
+            await interactive_mode(openspace, ui_manager)
+    except KeyboardInterrupt:
+        print("\n\nInterrupt signal detected")
+    except Exception as e:
+        logger.error(f"Error: {e}", exc_info=True)
+        print(f"\nError: {e}")
+        return 1
+    finally:
+        if openspace:
+            print("\nCleaning up resources...")
+            await openspace.cleanup()
+    print("\nGoodbye!")
+    return 0
+def run_main():
+    """Run main function"""
+    try:
+        exit_code = asyncio.run(main())
+        sys.exit(exit_code)
+    except KeyboardInterrupt:
+        print("\n\nProgram interrupted")
+        sys.exit(0)
+if __name__ == "__main__":
+    run_main()

openspace/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from openspace.agents.base import BaseAgent, AgentStatus, AgentRegistry
+from openspace.agents.grounding_agent import GroundingAgent
+__all__ = [
+    "BaseAgent",
+    "AgentStatus",
+    "AgentRegistry",
+    "GroundingAgent",
+]

openspace/agents/base.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from __future__ import annotations
+import json
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Dict, List, Optional, Type, Any
+from openspace.utils.logging import Logger
+if TYPE_CHECKING:
+    from openspace.llm import LLMClient
+    from openspace.grounding.core.grounding_client import GroundingClient
+    from openspace.recording import RecordingManager
+logger = Logger.get_logger(__name__)
+class BaseAgent(ABC):
+    def __init__(
+        self,
+        name: str,
+        backend_scope: Optional[List[str]] = None,
+        llm_client: Optional[LLMClient] = None,
+        grounding_client: Optional[GroundingClient] = None,
+        recording_manager: Optional[RecordingManager] = None,
+    ) -> None:
+        """
+        Initialize the BaseAgent.
+        Args:
+            name: Unique name for the agent
+            backend_scope: List of backend types this agent can access (e.g., ["gui", "shell", "mcp", "web", "system"])
+            llm_client: LLM client for agent reasoning (optional, can be set later)
+            grounding_client: Reference to GroundingClient for tool execution
+            recording_manager: RecordingManager for recording execution
+        """
+        self._name = name
+        self._grounding_client: Optional[GroundingClient] = grounding_client
+        self._backend_scope = backend_scope or []
+        self._llm_client = llm_client
+        self._recording_manager: Optional[RecordingManager] = recording_manager
+        self._step = 0
+        self._status = AgentStatus.ACTIVE
+        self._register_self()
+        logger.info(f"Initialized {self.__class__.__name__}: {name}")
+    @property
+    def name(self) -> str:
+        return self._name
+    @property
+    def grounding_client(self) -> Optional[GroundingClient]:
+        """Get the grounding client."""
+        return self._grounding_client
+    @property
+    def backend_scope(self) -> List[str]:
+        return self._backend_scope
+    @property
+    def llm_client(self) -> Optional[LLMClient]:
+        return self._llm_client
+    @llm_client.setter
+    def llm_client(self, client: LLMClient) -> None:
+        self._llm_client = client
+    @property
+    def recording_manager(self) -> Optional[RecordingManager]:
+        """Get the recording manager."""
+        return self._recording_manager
+    @property
+    def step(self) -> int:
+        return self._step
+    @property
+    def status(self) -> str:
+        return self._status
+    @abstractmethod
+    async def process(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        pass
+    @abstractmethod
+    def construct_messages(self, context: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Construct messages for LLM reasoning.
+        Context must contain 'instruction' key.
+        """
+        pass
+    async def get_llm_response(
+        self,
+        messages: List[Dict[str, Any]],
+        tools: Optional[List] = None,
+        **kwargs
+    ) -> Dict[str, Any]:
+        if not self._llm_client:
+            raise ValueError(f"LLM client not initialized for agent {self.name}")
+        try:
+            response = await self._llm_client.complete(
+                messages=messages,
+                tools=tools,
+                **kwargs
+            )
+            return response
+        except Exception as e:
+            logger.error(f"{self.name}: LLM call failed: {e}", exc_info=True)
+            raise
+    def response_to_dict(self, response: str) -> Dict[str, Any]:
+        try:
+            if response.strip().startswith("```json") or response.strip().startswith("```"):
+                lines = response.strip().split('\n')
+                if lines and lines[0].startswith('```'):
+                    lines = lines[1:]
+                end_idx = len(lines)
+                for i, line in enumerate(lines):
+                    if line.strip() == '```':
+                        end_idx = i
+                        break
+                response = '\n'.join(lines[:end_idx])
+            return json.loads(response)
+        except json.JSONDecodeError as e:
+            # If parsing fails, try to find and extract just the JSON object/array
+            if "Extra data" in str(e):
+                try:
+                    decoder = json.JSONDecoder()
+                    obj, idx = decoder.raw_decode(response)
+                    logger.warning(
+                        f"{self.name}: Successfully extracted JSON but found extra text after position {idx}. "
+                        f"Extra text: {response[idx:idx+100]}..."
+                    )
+                    return obj
+                except Exception as e2:
+                    logger.error(f"{self.name}: Failed to extract JSON even with raw_decode: {e2}")
+            logger.error(f"{self.name}: Failed to parse response: {e}")
+            logger.error(f"{self.name}: Response content: {response[:500]}")
+            return {"error": "Failed to parse response", "raw": response}
+    def increment_step(self) -> None:
+        self._step += 1
+    @classmethod
+    def _register_self(cls) -> None:
+        """Register the agent class in the registry upon instantiation."""
+        # Get the actual instance class, not BaseAgent
+        if cls.__name__ != "BaseAgent" and cls.__name__ not in AgentRegistry._registry:
+            AgentRegistry.register(cls.__name__, cls)
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__}(name={self.name}, step={self.step}, status={self.status})>"
+class AgentStatus:
+    """Constants for agent status."""
+    ACTIVE = "active"
+    IDLE = "idle"
+    WAITING = "waiting"
+class AgentRegistry:
+    """
+    Registry for managing agent classes.
+    Allows dynamic registration and retrieval of agent types.
+    """
+    _registry: Dict[str, Type[BaseAgent]] = {}
+    @classmethod
+    def register(cls, name: str, agent_cls: Type[BaseAgent]) -> None:
+        if name in cls._registry:
+            logger.warning(f"Agent class '{name}' already registered, overwriting")
+        cls._registry[name] = agent_cls
+        logger.debug(f"Registered agent class: {name}")
+    @classmethod
+    def get_cls(cls, name: str) -> Type[BaseAgent]:
+        if name not in cls._registry:
+            raise ValueError(f"No agent class registered under '{name}'")
+        return cls._registry[name]
+    @classmethod
+    def list_registered(cls) -> List[str]:
+        return list(cls._registry.keys())
+    @classmethod
+    def clear(cls) -> None:
+        cls._registry.clear()
+        logger.debug("Agent registry cleared")

openspace/agents/grounding_agent.py ADDED Viewed

	@@ -0,0 +1,1212 @@

+from __future__ import annotations
+import copy
+import json
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+from openspace.agents.base import BaseAgent
+from openspace.grounding.core.types import BackendType, ToolResult
+from openspace.platforms.screenshot import ScreenshotClient
+from openspace.prompts import GroundingAgentPrompts
+from openspace.utils.logging import Logger
+if TYPE_CHECKING:
+    from openspace.llm import LLMClient
+    from openspace.grounding.core.grounding_client import GroundingClient
+    from openspace.recording import RecordingManager
+    from openspace.skill_engine import SkillRegistry
+logger = Logger.get_logger(__name__)
+class GroundingAgent(BaseAgent):
+    def __init__(
+        self,
+        name: str = "GroundingAgent",
+        backend_scope: Optional[List[str]] = None,
+        llm_client: Optional[LLMClient] = None,
+        grounding_client: Optional[GroundingClient] = None,
+        recording_manager: Optional[RecordingManager] = None,
+        system_prompt: Optional[str] = None,
+        max_iterations: int = 15,
+        visual_analysis_timeout: float = 30.0,
+        tool_retrieval_llm: Optional[LLMClient] = None,
+        visual_analysis_model: Optional[str] = None,
+    ) -> None:
+        """
+        Initialize the Grounding Agent.
+        Args:
+            name: Agent name
+            backend_scope: List of backends this agent can access (None = all available)
+            llm_client: LLM client for reasoning
+            grounding_client: GroundingClient for tool execution
+            recording_manager: RecordingManager for recording execution
+            system_prompt: Custom system prompt
+            max_iterations: Maximum LLM reasoning iterations for self-correction
+            visual_analysis_timeout: Timeout for visual analysis LLM calls in seconds
+            tool_retrieval_llm: LLM client for tool retrieval filter (None = use llm_client)
+            visual_analysis_model: Model name for visual analysis (None = use llm_client.model)
+        """
+        super().__init__(
+            name=name,
+            backend_scope=backend_scope or ["gui", "shell", "mcp", "web", "system"],
+            llm_client=llm_client,
+            grounding_client=grounding_client,
+            recording_manager=recording_manager
+        )
+        self._system_prompt = system_prompt or self._default_system_prompt()
+        self._max_iterations = max_iterations
+        self._visual_analysis_timeout = visual_analysis_timeout
+        self._tool_retrieval_llm = tool_retrieval_llm
+        self._visual_analysis_model = visual_analysis_model
+        # Skill context injection (set externally before process())
+        self._skill_context: Optional[str] = None
+        self._active_skill_ids: List[str] = []
+        # Skill registry for mid-iteration retrieve_skill tool
+        self._skill_registry: Optional["SkillRegistry"] = None
+        # Tools from the last execution (available for post-execution analysis)
+        self._last_tools: List = []
+        logger.info(f"Grounding Agent initialized: {name}")
+        logger.info(f"Backend scope: {self._backend_scope}")
+        logger.info(f"Max iterations: {self._max_iterations}")
+        logger.info(f"Visual analysis timeout: {self._visual_analysis_timeout}s")
+        if tool_retrieval_llm:
+            logger.info(f"Tool retrieval model: {tool_retrieval_llm.model}")
+        if visual_analysis_model:
+            logger.info(f"Visual analysis model: {visual_analysis_model}")
+    def set_skill_context(
+        self,
+        context: str,
+        skill_ids: Optional[List[str]] = None,
+    ) -> None:
+        """Inject skill guidance into the agent's system prompt.
+        Called by ``OpenSpace.execute()`` before ``process()`` when skills
+        are matched.  The context is a formatted string built by
+        ``SkillRegistry.build_context_injection()``.
+        Args:
+            context: Formatted skill content for system prompt injection.
+            skill_ids: skill_id values of injected skills.
+        """
+        self._skill_context = context if context else None
+        self._active_skill_ids = skill_ids or []
+        if self._skill_context:
+            logger.info(f"Skill context set: {', '.join(self._active_skill_ids) or '(unnamed)'}")
+    def clear_skill_context(self) -> None:
+        """Remove skill guidance (used before fallback execution)."""
+        if self._skill_context:
+            logger.info(f"Skill context cleared (was: {', '.join(self._active_skill_ids)})")
+        self._skill_context = None
+        self._active_skill_ids = []
+    @property
+    def has_skill_context(self) -> bool:
+        return self._skill_context is not None
+    def set_skill_registry(self, registry: Optional["SkillRegistry"]) -> None:
+        """Attach a SkillRegistry so the agent can offer ``retrieve_skill`` as a tool."""
+        self._skill_registry = registry
+        if registry:
+            count = len(registry.list_skills())
+            logger.info(f"Skill registry attached ({count} skill(s) available for mid-iteration retrieval)")
+    _MAX_SINGLE_CONTENT_CHARS = 30_000
+    @classmethod
+    def _cap_message_content(cls, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Truncate oversized individual message contents in-place.
+        Targets tool-result messages and assistant messages that can
+        carry enormous file contents (read_file on large CSVs/scripts).
+        System messages and the first user instruction are never touched.
+        """
+        cap = cls._MAX_SINGLE_CONTENT_CHARS
+        trimmed = 0
+        for msg in messages:
+            content = msg.get("content")
+            if not isinstance(content, str) or len(content) <= cap:
+                continue
+            if msg.get("role") == "system":
+                continue
+            original_len = len(content)
+            msg["content"] = (
+                content[: cap // 2]
+                + f"\n\n... [truncated {original_len - cap:,} chars] ...\n\n"
+                + content[-(cap // 2):]
+            )
+            trimmed += 1
+        if trimmed:
+            logger.info(f"Capped {trimmed} oversized message(s) to {cap:,} chars each")
+        return messages
+    def _truncate_messages(
+        self,
+        messages: List[Dict[str, Any]],
+        keep_recent: int = 8,
+        max_tokens_estimate: int = 120000
+    ) -> List[Dict[str, Any]]:
+        # First: cap any single oversized message to prevent one huge
+        # tool-result from dominating the context window.
+        messages = self._cap_message_content(messages)
+        if len(messages) <= keep_recent + 2:  # +2 for system and initial user
+            return messages
+        total_text = json.dumps(messages, ensure_ascii=False)
+        estimated_tokens = len(total_text) // 4
+        if estimated_tokens < max_tokens_estimate:
+            return messages
+        logger.info(f"Truncating message history: {len(messages)} messages, "
+                   f"~{estimated_tokens:,} tokens -> keeping recent {keep_recent} rounds")
+        system_messages = []
+        user_instruction = None
+        conversation_messages = []
+        for msg in messages:
+            role = msg.get("role")
+            if role == "system":
+                system_messages.append(msg)
+            elif role == "user" and user_instruction is None:
+                user_instruction = msg
+            else:
+                conversation_messages.append(msg)
+        recent_messages = conversation_messages[-(keep_recent * 2):] if conversation_messages else []
+        truncated = system_messages.copy()
+        if user_instruction:
+            truncated.append(user_instruction)
+        truncated.extend(recent_messages)
+        logger.info(f"After truncation: {len(truncated)} messages, "
+                   f"~{len(json.dumps(truncated, ensure_ascii=False))//4:,} tokens (estimated)")
+        return truncated
+    async def process(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Process a task execution request with multi-round iteration control.
+        """
+        instruction = context.get("instruction", "")
+        if not instruction:
+            logger.error("Grounding Agent: No instruction provided")
+            return {"error": "No instruction provided", "status": "error"}
+        # Store current instruction for visual analysis context
+        self._current_instruction = instruction
+        logger.info(f"Grounding Agent: Processing instruction at step {self.step}")
+        # Exist workspace files check
+        workspace_info = await self._check_workspace_artifacts(context)
+        if workspace_info["has_files"]:
+            context["workspace_artifacts"] = workspace_info
+            logger.info(f"Workspace has {len(workspace_info['files'])} existing files: {workspace_info['files']}")
+        # Get available tools (auto-search with cap)
+        tools = await self._get_available_tools(instruction)
+        self._last_tools = tools  # expose for post-execution analysis
+        # Get search debug info (similarity scores, LLM selections)
+        search_debug_info = None
+        if self.grounding_client:
+            search_debug_info = self.grounding_client.get_last_search_debug_info()
+        # Build retrieved tools list for return value
+        retrieved_tools_list = []
+        for tool in tools:
+            tool_info = {
+                "name": getattr(tool, "name", str(tool)),
+                "description": getattr(tool, "description", ""),
+            }
+            # Prefer runtime_info.backend
+            # over backend_type (may be NOT_SET for cached RemoteTools)
+            runtime_info = getattr(tool, "_runtime_info", None)
+            if runtime_info and hasattr(runtime_info, "backend"):
+                tool_info["backend"] = runtime_info.backend.value if hasattr(runtime_info.backend, "value") else str(runtime_info.backend)
+                tool_info["server_name"] = runtime_info.server_name
+            elif hasattr(tool, "backend_type"):
+                tool_info["backend"] = tool.backend_type.value if hasattr(tool.backend_type, "value") else str(tool.backend_type)
+            # Add similarity score if available
+            if search_debug_info and search_debug_info.get("tool_scores"):
+                for score_info in search_debug_info["tool_scores"]:
+                    if score_info["name"] == tool_info["name"]:
+                        tool_info["similarity_score"] = score_info["score"]
+                        break
+            retrieved_tools_list.append(tool_info)
+        # Record retrieved tools
+        if self._recording_manager:
+            from openspace.recording import RecordingManager
+            await RecordingManager.record_retrieved_tools(
+                task_instruction=instruction,
+                tools=tools,
+                search_debug_info=search_debug_info,
+            )
+        # Initialize iteration state
+        max_iterations = context.get("max_iterations", self._max_iterations)
+        current_iteration = 0
+        all_tool_results = []
+        iteration_contexts = []
+        consecutive_empty_responses = 0  # Track consecutive empty LLM responses
+        MAX_CONSECUTIVE_EMPTY = 5  # Exit after this many empty responses
+        # Build initial messages
+        messages = self.construct_messages(context)
+        # Record initial conversation setup once (system prompts + user instruction + tool definitions)
+        from openspace.recording import RecordingManager
+        await RecordingManager.record_conversation_setup(
+            setup_messages=copy.deepcopy(messages),
+            tools=tools,
+        )
+        try:
+            while current_iteration < max_iterations:
+                current_iteration += 1
+                logger.info(f"Grounding Agent: Iteration {current_iteration}/{max_iterations}")
+                # Strip skill context after the first iteration to save prompt tokens.
+                # Skills only need to guide the first LLM call; subsequent iterations
+                # already have the plan and tool results in context.
+                if current_iteration == 2 and self._skill_context:
+                    skill_ctx = self._skill_context
+                    messages = [
+                        m for m in messages
+                        if not (m.get("role") == "system" and m.get("content") == skill_ctx)
+                    ]
+                    logger.info("Skill context removed from messages after first iteration")
+                # Cap oversized individual messages every iteration to prevent
+                # a single huge tool result from ballooning all subsequent calls.
+                if current_iteration >= 2:
+                    messages = self._cap_message_content(messages)
+                # Truncate message history to prevent context length issues
+                # Start truncating after 5 iterations to keep context manageable
+                if current_iteration >= 5:
+                    messages = self._truncate_messages(
+                        messages,
+                        keep_recent=8,
+                        max_tokens_estimate=120000
+                    )
+                messages_input_snapshot = copy.deepcopy(messages)
+                # [DISABLED] Iteration summary generation
+                # Tool results (including visual analysis) are already in context,
+                # LLM can make decisions directly without separate summary.
+                # To re-enable, uncomment below and pass iteration_summary_prompt to complete()
+                # iteration_summary_prompt = GroundingAgentPrompts.iteration_summary(
+                #     instruction=instruction,
+                #     iteration=current_iteration,
+                #     max_iterations=max_iterations
+                # ) if context.get("auto_execute", True) else None
+                # Call LLMClient for single round
+                # LLM will decide whether to call tools or finish with <COMPLETE>
+                llm_response = await self._llm_client.complete(
+                    messages=messages,
+                    tools=tools if context.get("auto_execute", True) else None,
+                    execute_tools=context.get("auto_execute", True),
+                    summary_prompt=None,  # Disabled
+                    tool_result_callback=self._visual_analysis_callback
+                )
+                # Update messages with LLM response
+                messages = llm_response["messages"]
+                # Collect tool results
+                tool_results_this_iteration = llm_response.get("tool_results", [])
+                if tool_results_this_iteration:
+                    all_tool_results.extend(tool_results_this_iteration)
+                # [DISABLED] Iteration summary logging
+                # llm_summary = llm_response.get("iteration_summary")
+                # if llm_summary:
+                #     logger.info(f"Iteration {current_iteration} summary: {llm_summary[:150]}...")
+                assistant_message = llm_response.get("message", {})
+                assistant_content = assistant_message.get("content", "")
+                has_tool_calls = llm_response.get('has_tool_calls', False)
+                logger.info(f"Iteration {current_iteration} - Has tool calls: {has_tool_calls}, "
+                          f"Tool results: {len(tool_results_this_iteration)}, "
+                          f"Content length: {len(assistant_content)} chars")
+                if len(assistant_content) > 0:
+                    logger.info(f"Iteration {current_iteration} - Assistant content preview: {repr(assistant_content[:300])}")
+                    consecutive_empty_responses = 0  # Reset counter on valid response
+                else:
+                    if not has_tool_calls:
+                        consecutive_empty_responses += 1
+                        logger.warning(f"Iteration {current_iteration} - NO tool calls and NO content "
+                                     f"(empty response {consecutive_empty_responses}/{MAX_CONSECUTIVE_EMPTY})")
+                        if consecutive_empty_responses >= MAX_CONSECUTIVE_EMPTY:
+                            logger.error(f"Exiting due to {MAX_CONSECUTIVE_EMPTY} consecutive empty LLM responses. "
+                                       "This may indicate API issues, rate limiting, or context too long.")
+                            break
+                    else:
+                        consecutive_empty_responses = 0  # Reset if we have tool calls
+                # Snapshot messages after LLM call (accumulated context)
+                messages_output_snapshot = copy.deepcopy(messages)
+                # Delta messages: only the messages produced in this iteration
+                # (avoids repeating system prompts / initial user instruction each time)
+                delta_messages = messages[len(messages_input_snapshot):]
+                # Response metadata (lightweight; full content lives in delta_messages)
+                response_metadata = {
+                    "has_tool_calls": has_tool_calls,
+                    "tool_calls_count": len(tool_results_this_iteration),
+                }
+                iteration_context = {
+                    "iteration": current_iteration,
+                    "messages_input": messages_input_snapshot,
+                    "messages_output": messages_output_snapshot,
+                    "response_metadata": response_metadata,
+                }
+                iteration_contexts.append(iteration_context)
+                # Real-time save to conversations.jsonl (delta only, no redundancy)
+                await RecordingManager.record_iteration_context(
+                    iteration=current_iteration,
+                    delta_messages=copy.deepcopy(delta_messages),
+                    response_metadata=response_metadata,
+                )
+                # Check for completion token in assistant content
+                # [DISABLED] Also check in iteration summary when enabled
+                # is_complete = (
+                #     GroundingAgentPrompts.TASK_COMPLETE in assistant_content or
+                #     (llm_summary and GroundingAgentPrompts.TASK_COMPLETE in llm_summary)
+                # )
+                is_complete = GroundingAgentPrompts.TASK_COMPLETE in assistant_content
+                if is_complete:
+                    # Task is complete - LLM generated completion token
+                    logger.info(f"Task completed at iteration {current_iteration} (found {GroundingAgentPrompts.TASK_COMPLETE})")
+                    break
+                else:
+                    # LLM didn't generate <COMPLETE>, continue to next iteration
+                    if tool_results_this_iteration:
+                        logger.debug(f"Task in progress, LLM called {len(tool_results_this_iteration)} tools")
+                    else:
+                        logger.debug(f"Task in progress, LLM did not generate <COMPLETE>")
+                    # Remove previous iteration guidance to avoid accumulation
+                    messages = [
+                        msg for msg in messages
+                        if not (msg.get("role") == "system" and "Iteration" in msg.get("content", "") and "complete" in msg.get("content", ""))
+                    ]
+                    guidance_msg = {
+                        "role": "system",
+                        "content": f"Iteration {current_iteration} complete. "
+                                   f"Check if task is finished - if yes, output {GroundingAgentPrompts.TASK_COMPLETE}. "
+                                   f"If not, continue with next action."
+                    }
+                    messages.append(guidance_msg)
+                    # [DISABLED] Full iteration feedback with summary
+                    # self._remove_previous_guidance(messages)
+                    # feedback_msg = self._build_iteration_feedback(
+                    #     iteration=current_iteration,
+                    #     llm_summary=llm_summary,
+                    #     add_guidance=True
+                    # )
+                    # if feedback_msg:
+                    #     messages.append(feedback_msg)
+                    #     logger.debug(f"Added iteration {current_iteration} feedback with guidance")
+                    continue
+            # Build final result
+            result = await self._build_final_result(
+                instruction=instruction,
+                messages=messages,
+                all_tool_results=all_tool_results,
+                iterations=current_iteration,
+                max_iterations=max_iterations,
+                iteration_contexts=iteration_contexts,
+                retrieved_tools_list=retrieved_tools_list,
+                search_debug_info=search_debug_info,
+            )
+            # Record agent action to recording manager
+            if self._recording_manager:
+                await self._record_agent_execution(result, instruction)
+            # Increment step
+            self.increment_step()
+            logger.info(f"Grounding Agent: Execution completed with status: {result.get('status')}")
+            return result
+        except Exception as e:
+            logger.error(f"Grounding Agent: Execution failed: {e}")
+            result = {
+                "error": str(e),
+                "status": "error",
+                "instruction": instruction,
+                "iteration": current_iteration
+            }
+            self.increment_step()
+            return result
+    def _default_system_prompt(self) -> str:
+        """Default system prompt tailored to the agent's actual backend scope."""
+        return GroundingAgentPrompts.build_system_prompt(self._backend_scope)
+    def construct_messages(
+        self,
+        context: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        messages = [{"role": "system", "content": self._system_prompt}]
+        # Get instruction from context
+        instruction = context.get("instruction", "")
+        if not instruction:
+            raise ValueError("context must contain 'instruction' field")
+        # Add workspace directory
+        workspace_dir = context.get("workspace_dir")
+        if workspace_dir:
+            messages.append({
+                "role": "system",
+                "content": GroundingAgentPrompts.workspace_directory(workspace_dir)
+            })
+        # Add workspace artifacts information
+        workspace_artifacts = context.get("workspace_artifacts")
+        if workspace_artifacts and workspace_artifacts.get("has_files"):
+            files = workspace_artifacts.get("files", [])
+            matching_files = workspace_artifacts.get("matching_files", [])
+            recent_files = workspace_artifacts.get("recent_files", [])
+            if matching_files:
+                artifact_msg = GroundingAgentPrompts.workspace_matching_files(matching_files)
+            elif len(recent_files) >= 2:
+                artifact_msg = GroundingAgentPrompts.workspace_recent_files(
+                    total_files=len(files),
+                    recent_files=recent_files
+                )
+            else:
+                artifact_msg = GroundingAgentPrompts.workspace_file_list(files)
+            messages.append({
+                "role": "system",
+                "content": artifact_msg
+            })
+        # Skill injection — only active (selected) skills, full content
+        if self._skill_context:
+            messages.append({
+                "role": "system",
+                "content": self._skill_context
+            })
+            logger.info(f"Injected active skill context ({len(self._active_skill_ids)} skill(s))")
+        # User instruction
+        messages.append({"role": "user", "content": instruction})
+        return messages
+    async def _get_available_tools(self, task_description: Optional[str]) -> List:
+        """
+        Retrieve tools for the current execution phase.
+        Both skill-augmented and normal modes use the same
+        ``get_tools_with_auto_search`` pipeline:
+        - Non-MCP tools (shell, gui, web, system) are always included.
+        - MCP tools are filtered by relevance only when their count
+          exceeds ``max_tools``.
+        When skills are active, the shell backend is guaranteed to be in
+        scope (skills commonly reference ``shell_agent``).
+        Falls back to returning all tools if anything fails.
+        """
+        grounding_client = self.grounding_client
+        if not grounding_client:
+            return []
+        backends = [BackendType(name) for name in self._backend_scope]
+        # Ensure shell backend is available when skills are active
+        # (skills commonly reference shell_agent, read_file, etc.)
+        if self.has_skill_context:
+            shell_bt = BackendType.SHELL
+            if shell_bt not in backends:
+                backends = list(backends) + [shell_bt]
+                logger.info("Added Shell backend to scope for skill file I/O")
+        try:
+            retrieval_llm = self._tool_retrieval_llm or self._llm_client
+            tools = await grounding_client.get_tools_with_auto_search(
+                task_description=task_description,
+                backend=backends,
+                use_cache=True,
+                llm_callable=retrieval_llm,
+            )
+            logger.info(
+                f"GroundingAgent selected {len(tools)} tools (auto-search) "
+                f"from {len(backends)} backends"
+                + (f" [skill-augmented]" if self.has_skill_context else "")
+            )
+        except Exception as e:
+            logger.warning(f"Auto-search tools failed, falling back to full list: {e}")
+            tools = await self._load_all_tools(grounding_client)
+        # Append retrieve_skill tool when skill registry is available
+        if self._skill_registry and self._skill_registry.list_skills():
+            from openspace.skill_engine.retrieve_tool import RetrieveSkillTool
+            retrieve_llm = self._tool_retrieval_llm or self._llm_client
+            retrieve_tool = RetrieveSkillTool(
+                self._skill_registry,
+                backends=[b.value for b in backends],
+                llm_client=retrieve_llm,
+                skill_store=getattr(self, "_skill_store", None),
+            )
+            retrieve_tool.bind_runtime_info(
+                backend=BackendType.SYSTEM,
+                session_name="internal",
+            )
+            tools.append(retrieve_tool)
+            logger.info("Added retrieve_skill tool for mid-iteration skill retrieval")
+        return tools
+    async def _load_all_tools(self, grounding_client: "GroundingClient") -> List:
+        """Fallback: load all tools from all backends without search."""
+        all_tools = []
+        for backend_name in self._backend_scope:
+            try:
+                backend_type = BackendType(backend_name)
+                tools = await grounding_client.list_tools(backend=backend_type)
+                all_tools.extend(tools)
+                logger.debug(f"Retrieved {len(tools)} tools from backend: {backend_name}")
+            except Exception as e:
+                logger.debug(f"Could not get tools from {backend_name}: {e}")
+        logger.info(
+            f"GroundingAgent fallback retrieved {len(all_tools)} tools "
+            f"from {len(self._backend_scope)} backends"
+        )
+        return all_tools
+    async def _visual_analysis_callback(
+        self,
+        result: ToolResult,
+        tool_name: str,
+        tool_call: Dict,
+        backend: str
+    ) -> ToolResult:
+        """
+        Callback for LLMClient to handle visual analysis after tool execution.
+        """
+        # 1. Check if LLM requested to skip visual analysis
+        skip_visual_analysis = False
+        try:
+            arguments = tool_call.function.arguments
+            if isinstance(arguments, str):
+                args = json.loads(arguments.strip() or "{}")
+            else:
+                args = arguments
+            if isinstance(args, dict) and args.get("skip_visual_analysis"):
+                skip_visual_analysis = True
+                logger.info(f"Visual analysis skipped for {tool_name} (meta-parameter set by LLM)")
+        except Exception as e:
+            logger.debug(f"Could not parse tool arguments: {e}")
+        # 2. If skip requested, return original result
+        if skip_visual_analysis:
+            return result
+        # 3. Check if this backend needs visual analysis
+        if backend != "gui":
+            return result
+        # 4. Check if tool has visual data
+        metadata = getattr(result, 'metadata', None)
+        has_screenshots = metadata and (metadata.get("screenshot") or metadata.get("screenshots"))
+        # 5. If no visual data, try to capture a screenshot
+        if not has_screenshots:
+            try:
+                logger.info(f"No visual data from {tool_name}, capturing screenshot...")
+                screenshot_client = ScreenshotClient()
+                screenshot_bytes = await screenshot_client.capture()
+                if screenshot_bytes:
+                    # Add screenshot to result metadata
+                    if metadata is None:
+                        result.metadata = {}
+                        metadata = result.metadata
+                    metadata["screenshot"] = screenshot_bytes
+                    has_screenshots = True
+                    logger.info(f"Screenshot captured for visual analysis")
+                else:
+                    logger.warning("Failed to capture screenshot")
+            except Exception as e:
+                logger.warning(f"Error capturing screenshot: {e}")
+        # 6. If still no screenshots, return original result
+        if not has_screenshots:
+            logger.debug(f"No visual data available for {tool_name}")
+            return result
+        # 7. Perform visual analysis
+        return await self._enhance_result_with_visual_context(result, tool_name)
+    async def _enhance_result_with_visual_context(
+        self,
+        result: ToolResult,
+        tool_name: str
+    ) -> ToolResult:
+        """
+        Enhance tool result with visual analysis for grounding agent workflows.
+        """
+        import asyncio
+        import base64
+        import litellm
+        try:
+            metadata = getattr(result, 'metadata', None)
+            if not metadata:
+                return result
+            # Collect all screenshots
+            screenshots_bytes = []
+            # Check for multiple screenshots first
+            if metadata.get("screenshots"):
+                screenshots_list = metadata["screenshots"]
+                if isinstance(screenshots_list, list):
+                    screenshots_bytes = [s for s in screenshots_list if s]
+            # Fall back to single screenshot
+            elif metadata.get("screenshot"):
+                screenshots_bytes = [metadata["screenshot"]]
+            if not screenshots_bytes:
+                return result
+            # Select key screenshots if there are too many
+            selected_screenshots = self._select_key_screenshots(screenshots_bytes, max_count=3)
+            # Convert to base64
+            visual_b64_list = []
+            for visual_data in selected_screenshots:
+                if isinstance(visual_data, bytes):
+                    visual_b64_list.append(base64.b64encode(visual_data).decode('utf-8'))
+                else:
+                    visual_b64_list.append(visual_data)  # Already base64
+            # Build prompt based on number of screenshots
+            num_screenshots = len(visual_b64_list)
+            prompt = GroundingAgentPrompts.visual_analysis(
+                tool_name=tool_name,
+                num_screenshots=num_screenshots,
+                task_description=getattr(self, '_current_instruction', '')
+            )
+            # Build content with text prompt + all images
+            content = [{"type": "text", "text": prompt}]
+            for visual_b64 in visual_b64_list:
+                content.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/png;base64,{visual_b64}"
+                    }
+                })
+            # Use dedicated visual analysis model if configured, otherwise use main LLM model
+            visual_model = self._visual_analysis_model or (self._llm_client.model if self._llm_client else "openrouter/anthropic/claude-sonnet-4.5")
+            response = await asyncio.wait_for(
+                litellm.acompletion(
+                    model=visual_model,
+                    messages=[{
+                        "role": "user",
+                        "content": content
+                    }],
+                    timeout=self._visual_analysis_timeout
+                ),
+                timeout=self._visual_analysis_timeout + 5
+            )
+            analysis = response.choices[0].message.content.strip()
+            # Inject visual analysis into content
+            original_content = result.content or "(no text output)"
+            enhanced_content = f"{original_content}\n\n**Visual content**: {analysis}"
+            # Create enhanced result
+            enhanced_result = ToolResult(
+                status=result.status,
+                content=enhanced_content,
+                error=result.error,
+                metadata={**metadata, "visual_analyzed": True, "visual_analysis": analysis},
+                execution_time=result.execution_time
+            )
+            logger.info(f"Enhanced {tool_name} result with visual analysis ({num_screenshots} screenshot(s))")
+            return enhanced_result
+        except asyncio.TimeoutError:
+            logger.warning(f"Visual analysis timed out for {tool_name}, returning original result")
+            return result
+        except Exception as e:
+            logger.warning(f"Failed to analyze visual content for {tool_name}: {e}")
+            return result
+    def _select_key_screenshots(
+        self,
+        screenshots: List[bytes],
+        max_count: int = 3
+    ) -> List[bytes]:
+        """
+        Select key screenshots if there are too many.
+        """
+        if len(screenshots) <= max_count:
+            return screenshots
+        selected_indices = set()
+        # Always include last (final state)
+        selected_indices.add(len(screenshots) - 1)
+        # If room, include first (initial state)
+        if max_count >= 2:
+            selected_indices.add(0)
+        # Fill remaining slots with evenly spaced middle screenshots
+        remaining_slots = max_count - len(selected_indices)
+        if remaining_slots > 0:
+            # Calculate spacing
+            available_indices = [
+                i for i in range(1, len(screenshots) - 1)
+                if i not in selected_indices
+            ]
+            if available_indices:
+                step = max(1, len(available_indices) // (remaining_slots + 1))
+                for i in range(remaining_slots):
+                    idx = min((i + 1) * step, len(available_indices) - 1)
+                    if idx < len(available_indices):
+                        selected_indices.add(available_indices[idx])
+        # Return screenshots in original order
+        selected = [screenshots[i] for i in sorted(selected_indices)]
+        logger.debug(
+            f"Selected {len(selected)} screenshots at indices {sorted(selected_indices)} "
+            f"from total of {len(screenshots)}"
+        )
+        return selected
+    def _get_workspace_path(self, context: Dict[str, Any]) -> Optional[str]:
+        """
+        Get workspace directory path from context.
+        """
+        return context.get("workspace_dir")
+    def _scan_workspace_files(
+        self,
+        workspace_path: str,
+        recent_threshold: int = 600 # seconds
+    ) -> Dict[str, Any]:
+        """
+        Scan workspace directory and collect file information.
+        Args:
+            workspace_path: Path to workspace directory
+            recent_threshold: Threshold in seconds for recent files
+        Returns:
+            Dictionary with file information:
+                - files: List of all filenames
+                - file_details: Dict mapping filename to file info (size, modified, age_seconds)
+                - recent_files: List of recently modified filenames
+        """
+        import os
+        import time
+        result = {
+            "files": [],
+            "file_details": {},
+            "recent_files": []
+        }
+        if not workspace_path or not os.path.exists(workspace_path):
+            return result
+        # Recording system files to exclude from workspace scanning
+        excluded_files = {"metadata.json", "traj.jsonl"}
+        try:
+            current_time = time.time()
+            for filename in os.listdir(workspace_path):
+                filepath = os.path.join(workspace_path, filename)
+                if os.path.isfile(filepath) and filename not in excluded_files:
+                    result["files"].append(filename)
+                    # Get file stats
+                    stat = os.stat(filepath)
+                    file_info = {
+                        "size": stat.st_size,
+                        "modified": stat.st_mtime,
+                        "age_seconds": current_time - stat.st_mtime
+                    }
+                    result["file_details"][filename] = file_info
+                    # Track recently created/modified files
+                    if file_info["age_seconds"] < recent_threshold:
+                        result["recent_files"].append(filename)
+            result["files"] = sorted(result["files"])
+        except Exception as e:
+            logger.debug(f"Error scanning workspace files: {e}")
+        return result
+    async def _check_workspace_artifacts(self, context: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Check workspace directory for existing artifacts that might be relevant to the task.
+        Enhanced to detect if task might already be completed.
+        """
+        import re
+        workspace_info = {"has_files": False, "files": [], "file_details": {}, "recent_files": []}
+        try:
+            # Get workspace path
+            workspace_path = self._get_workspace_path(context)
+            # Scan workspace files
+            scan_result = self._scan_workspace_files(workspace_path, recent_threshold=600)
+            if scan_result["files"]:
+                workspace_info["has_files"] = True
+                workspace_info["files"] = scan_result["files"]
+                workspace_info["file_details"] = scan_result["file_details"]
+                workspace_info["recent_files"] = scan_result["recent_files"]
+                logger.info(f"Grounding Agent: Found {len(scan_result['files'])} existing files in workspace "
+                           f"({len(scan_result['recent_files'])} recent)")
+                # Check if instruction mentions specific filenames
+                instruction = context.get("instruction", "")
+                if instruction:
+                    # Look for potential file references in instruction
+                    potential_outputs = []
+                    # Match common file patterns: filename.ext, "filename", 'filename'
+                    file_patterns = re.findall(r'["\']?([a-zA-Z0-9_\-]+\.[a-zA-Z0-9]+)["\']?', instruction)
+                    for pattern in file_patterns:
+                        if pattern in scan_result["files"]:
+                            potential_outputs.append(pattern)
+                    if potential_outputs:
+                        workspace_info["matching_files"] = potential_outputs
+                        logger.info(f"Grounding Agent: Found {len(potential_outputs)} files matching task: {potential_outputs}")
+        except Exception as e:
+            logger.debug(f"Could not check workspace artifacts: {e}")
+        return workspace_info
+    def _build_iteration_feedback(
+        self,
+        iteration: int,
+        llm_summary: Optional[str] = None,
+        add_guidance: bool = True
+    ) -> Optional[Dict[str, str]]:
+        """
+        Build feedback message to add to next iteration.
+        """
+        if not llm_summary:
+            return None
+        feedback_content = GroundingAgentPrompts.iteration_feedback(
+            iteration=iteration,
+            llm_summary=llm_summary,
+            add_guidance=add_guidance
+        )
+        return {
+            "role": "system",
+            "content": feedback_content
+        }
+    def _remove_previous_guidance(self, messages: List[Dict[str, Any]]) -> None:
+        """
+        Remove guidance section from previous iteration feedback messages.
+        """
+        for msg in messages:
+            if msg.get("role") == "system":
+                content = msg.get("content", "")
+                # Check if this is an iteration feedback message with guidance
+                if "## Iteration" in content and "Summary" in content and "---" in content:
+                    # Remove everything from "---" onwards (the guidance part)
+                    summary_only = content.split("---")[0].strip()
+                    msg["content"] = summary_only
+    async def _generate_final_summary(
+        self,
+        instruction: str,
+        messages: List[Dict],
+        iterations: int
+    ) -> tuple[str, bool, List[Dict]]:
+        """
+        Generate final summary across all iterations for reporting to upper layer.
+        Returns:
+            tuple[str, bool, List[Dict]]: (summary_text, success_flag, context_used)
+                - summary_text: The generated summary or error message
+                - success_flag: True if summary was generated successfully, False otherwise
+                - context_used: The cleaned messages used for generating summary
+        """
+        final_summary_prompt = {
+            "role": "user",
+            "content": GroundingAgentPrompts.final_summary(
+                instruction=instruction,
+                iterations=iterations
+            )
+        }
+        clean_messages = []
+        for msg in messages:
+            # Skip tool result messages
+            if msg.get("role") == "tool":
+                continue
+            # Copy message and remove tool_calls if present
+            clean_msg = msg.copy()
+            if "tool_calls" in clean_msg:
+                del clean_msg["tool_calls"]
+            clean_messages.append(clean_msg)
+        clean_messages.append(final_summary_prompt)
+        # Save context for return
+        context_for_return = copy.deepcopy(clean_messages)
+        try:
+            # Call LLMClient to generate final summary (without tools)
+            summary_response = await self._llm_client.complete(
+                messages=clean_messages,
+                tools=None,
+                execute_tools=False
+            )
+            final_summary = summary_response.get("message", {}).get("content", "")
+            if final_summary:
+                logger.info(f"Generated final summary: {final_summary[:200]}...")
+                return final_summary, True, context_for_return
+            else:
+                logger.warning("LLM returned empty final summary")
+                return f"Task completed after {iterations} iteration(s). Check execution history for details.", True, context_for_return
+        except Exception as e:
+            logger.error(f"Error generating final summary: {e}")
+            return f"Task completed after {iterations} iteration(s), but failed to generate summary: {str(e)}", False, context_for_return
+    async def _build_final_result(
+        self,
+        instruction: str,
+        messages: List[Dict],
+        all_tool_results: List[Dict],
+        iterations: int,
+        max_iterations: int,
+        iteration_contexts: List[Dict] = None,
+        retrieved_tools_list: List[Dict] = None,
+        search_debug_info: Dict[str, Any] = None,
+    ) -> Dict[str, Any]:
+        """
+        Build final execution result.
+        Args:
+            instruction: Original instruction
+            messages: Complete conversation history (including all iteration summaries)
+            all_tool_results: All tool execution results
+            iterations: Number of iterations performed
+            max_iterations: Maximum allowed iterations
+            iteration_contexts: Context snapshots for each iteration
+            retrieved_tools_list: List of tools retrieved for this task
+            search_debug_info: Debug info from tool search (similarity scores, LLM selections)
+        """
+        is_complete = self._check_task_completion(messages)
+        tool_executions = self._format_tool_executions(all_tool_results)
+        result = {
+            "instruction": instruction,
+            "step": self.step,
+            "iterations": iterations,
+            "tool_executions": tool_executions,
+            "messages": messages,
+            "iteration_contexts": iteration_contexts or [],
+            "retrieved_tools_list": retrieved_tools_list or [],
+            "search_debug_info": search_debug_info,
+            "active_skills": list(self._active_skill_ids),
+            "keep_session": True
+        }
+        if is_complete:
+            logger.info("Task completed with <COMPLETE> marker")
+            # Use LLM's own completion response directly (no extra LLM call needed)
+            # LLM already generates a summary before outputting <COMPLETE>
+            last_response = self._extract_last_assistant_message(messages)
+            # Remove the <COMPLETE> token from response for cleaner output
+            result["response"] = last_response.replace(GroundingAgentPrompts.TASK_COMPLETE, "").strip()
+            result["status"] = "success"
+            # [DISABLED] Extra LLM call to generate final summary
+            # final_summary, summary_success, final_summary_context = await self._generate_final_summary(
+            #     instruction=instruction,
+            #     messages=messages,
+            #     iterations=iterations
+            # )
+            # result["response"] = final_summary
+            # result["final_summary_context"] = final_summary_context
+        else:
+            result["response"] = self._extract_last_assistant_message(messages)
+            result["status"] = "incomplete"
+            result["warning"] = (
+                f"Task reached max iterations ({max_iterations}) without completion. "
+                f"This may indicate the task needs more steps or clarification."
+            )
+        return result
+    def _format_tool_executions(self, all_tool_results: List[Dict]) -> List[Dict]:
+        executions = []
+        for tr in all_tool_results:
+            tool_result_obj = tr.get("result")
+            tool_call = tr.get("tool_call")
+            status = "unknown"
+            if hasattr(tool_result_obj, 'status'):
+                status_obj = tool_result_obj.status
+                status = getattr(status_obj, 'value', status_obj)
+            # Extract tool_name and arguments from tool_call object (litellm format)
+            tool_name = "unknown"
+            arguments = {}
+            if tool_call is not None:
+                if hasattr(tool_call, 'function'):
+                    # tool_call is an object with .function attribute
+                    tool_name = getattr(tool_call.function, 'name', 'unknown')
+                    args_raw = getattr(tool_call.function, 'arguments', '{}')
+                    if isinstance(args_raw, str):
+                        try:
+                            arguments = json.loads(args_raw) if args_raw.strip() else {}
+                        except json.JSONDecodeError:
+                            arguments = {}
+                    else:
+                        arguments = args_raw if isinstance(args_raw, dict) else {}
+                elif isinstance(tool_call, dict):
+                    # Fallback: tool_call is a dict
+                    func = tool_call.get("function", {})
+                    tool_name = func.get("name", "unknown")
+                    args_raw = func.get("arguments", "{}")
+                    if isinstance(args_raw, str):
+                        try:
+                            arguments = json.loads(args_raw) if args_raw.strip() else {}
+                        except json.JSONDecodeError:
+                            arguments = {}
+                    else:
+                        arguments = args_raw if isinstance(args_raw, dict) else {}
+            executions.append({
+                "tool_name": tool_name,
+                "arguments": arguments,
+                "backend": tr.get("backend"),
+                "server_name": tr.get("server_name"),
+                "status": status,
+                "content": tool_result_obj.content if hasattr(tool_result_obj, 'content') else None,
+                "error": tool_result_obj.error if hasattr(tool_result_obj, 'error') else None,
+                "execution_time": tool_result_obj.execution_time if hasattr(tool_result_obj, 'execution_time') else None,
+                "metadata": tool_result_obj.metadata if hasattr(tool_result_obj, 'metadata') else {},
+            })
+        return executions
+    def _check_task_completion(self, messages: List[Dict]) -> bool:
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant":
+                content = msg.get("content", "")
+                return GroundingAgentPrompts.TASK_COMPLETE in content
+        return False
+    def _extract_last_assistant_message(self, messages: List[Dict]) -> str:
+        for msg in reversed(messages):
+            if msg.get("role") == "assistant":
+                return msg.get("content", "")
+        return ""
+    async def _record_agent_execution(
+        self,
+        result: Dict[str, Any],
+        instruction: str
+    ) -> None:
+        """
+        Record agent execution to recording manager.
+        Args:
+            result: Execution result
+            instruction: Original instruction
+        """
+        if not self._recording_manager:
+            return
+        # Extract tool execution summary
+        tool_summary = []
+        if result.get("tool_executions"):
+            for exec_info in result["tool_executions"]:
+                tool_summary.append({
+                    "tool": exec_info.get("tool_name", "unknown"),
+                    "backend": exec_info.get("backend", "unknown"),
+                    "status": exec_info.get("status", "unknown"),
+                })
+        await self._recording_manager.record_agent_action(
+            agent_name=self.name,
+            action_type="execute",
+            input_data={"instruction": instruction},
+            reasoning={
+                "response": result.get("response", ""),
+                "tools_selected": tool_summary,
+            },
+            output_data={
+                "status": result.get("status", "unknown"),
+                "iterations": result.get("iterations", 0),
+                "num_tool_executions": len(result.get("tool_executions", [])),
+            },
+            metadata={
+                "step": self.step,
+                "instruction": instruction,
+            }
+        )

openspace/cloud/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""Cloud platform integration.
+Provides:
+  - ``OpenSpaceClient`` — HTTP client for the cloud API
+  - ``get_openspace_auth`` — credential resolution
+  - ``SkillSearchEngine`` — hybrid BM25 + embedding search
+  - ``generate_embedding`` — OpenAI embedding generation
+"""
+from openspace.cloud.auth import get_openspace_auth
+def __getattr__(name: str):
+    if name == "OpenSpaceClient":
+        from openspace.cloud.client import OpenSpaceClient
+        return OpenSpaceClient
+    if name == "SkillSearchEngine":
+        from openspace.cloud.search import SkillSearchEngine
+        return SkillSearchEngine
+    if name == "generate_embedding":
+        from openspace.cloud.embedding import generate_embedding
+        return generate_embedding
+    raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
+__all__ = [
+    "OpenSpaceClient",
+    "get_openspace_auth",
+    "SkillSearchEngine",
+    "generate_embedding",
+]

openspace/cloud/auth.py ADDED Viewed

	@@ -0,0 +1,102 @@

+"""OpenSpace cloud platform authentication.
+Resolution order for OPENSPACE_API_KEY:
+  1. ``OPENSPACE_API_KEY`` env var
+  2. Auto-detect from host agent config (MCP env block)
+  3. Empty (caller treats as "not configured").
+Base URL resolution:
+  1. ``OPENSPACE_API_BASE`` env var
+  2. Default: ``https://open-space.cloud/api/v1``
+"""
+from __future__ import annotations
+import logging
+import os
+from typing import Dict, Optional
+logger = logging.getLogger("openspace.cloud")
+OPENSPACE_DEFAULT_BASE = "https://open-space.cloud/api/v1"
+def get_openspace_auth() -> tuple[Dict[str, str], str]:
+    """Resolve OpenSpace credentials and base URL.
+    Returns:
+        ``(auth_headers, api_base)`` — headers dict ready for HTTP requests
+        and the API base URL.  If no credentials are found, ``auth_headers``
+        is empty.
+    """
+    from openspace.host_detection import read_host_mcp_env
+    auth_headers: Dict[str, str] = {}
+    api_base = OPENSPACE_DEFAULT_BASE
+    # Tier 1: env vars
+    env_key = os.environ.get("OPENSPACE_API_KEY", "").strip()
+    env_base = os.environ.get("OPENSPACE_API_BASE", "").strip()
+    if env_key:
+        auth_headers["X-API-Key"] = env_key
+        if env_base:
+            api_base = env_base.rstrip("/")
+        logger.info("OpenSpace auth: using OPENSPACE_API_KEY env var")
+        return auth_headers, api_base
+    # Tier 2: host agent config MCP env block
+    mcp_env = read_host_mcp_env()
+    cfg_key = str(mcp_env.get("OPENSPACE_API_KEY", "")).strip()
+    cfg_base = str(mcp_env.get("OPENSPACE_API_BASE", "")).strip()
+    if cfg_key:
+        auth_headers["X-API-Key"] = cfg_key
+        if cfg_base:
+            api_base = cfg_base.rstrip("/")
+        logger.info("OpenSpace auth: using OPENSPACE_API_KEY from host agent MCP env config")
+        return auth_headers, api_base
+    return auth_headers, api_base
+def get_api_base(cli_override: Optional[str] = None) -> str:
+    """Resolve OpenSpace API base URL (for CLI scripts).
+    Priority: ``cli_override`` → env var → host agent config → default.
+    """
+    from openspace.host_detection import read_host_mcp_env
+    if cli_override:
+        return cli_override.rstrip("/")
+    env_base = os.environ.get("OPENSPACE_API_BASE", "").strip()
+    if env_base:
+        return env_base.rstrip("/")
+    mcp_env = read_host_mcp_env()
+    cfg_base = str(mcp_env.get("OPENSPACE_API_BASE", "")).strip()
+    if cfg_base:
+        return cfg_base.rstrip("/")
+    return OPENSPACE_DEFAULT_BASE
+def get_auth_headers_or_exit() -> Dict[str, str]:
+    """Resolve auth headers for CLI scripts.  Exits on failure."""
+    import sys
+    from openspace.host_detection import read_host_mcp_env
+    env_key = os.environ.get("OPENSPACE_API_KEY", "").strip()
+    if env_key:
+        return {"X-API-Key": env_key}
+    mcp_env = read_host_mcp_env()
+    cfg_key = str(mcp_env.get("OPENSPACE_API_KEY", "")).strip()
+    if cfg_key:
+        return {"X-API-Key": cfg_key}
+    print(
+        "ERROR: No OPENSPACE_API_KEY configured.\n"
+        "  Register at https://open-space.cloud to obtain a key, then add it to\n"
+        "  your host agent config in the OpenSpace MCP env block.",
+        file=sys.stderr,
+    )
+    sys.exit(1)

openspace/cloud/cli/__init__.py ADDED Viewed

File without changes

openspace/cloud/cli/download_skill.py ADDED Viewed

	@@ -0,0 +1,63 @@

+#!/usr/bin/env python3
+"""Download a skill from the OpenSpace cloud platform.
+Usage:
+    openspace-download-skill --skill-id "weather__imp_abc12345" --output-dir ./skills/
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from openspace.cloud.auth import get_api_base, get_auth_headers_or_exit
+from openspace.cloud.client import OpenSpaceClient, CloudError
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        prog="openspace-download-skill",
+        description="Download a skill from OpenSpace's cloud community",
+    )
+    parser.add_argument("--skill-id", required=True, help="Cloud skill record ID")
+    parser.add_argument("--output-dir", required=True, help="Target directory for extraction")
+    parser.add_argument("--api-base", default=None, help="Override API base URL")
+    parser.add_argument("--force", action="store_true", help="Overwrite existing skill directory")
+    args = parser.parse_args()
+    api_base = get_api_base(args.api_base)
+    headers = get_auth_headers_or_exit()
+    output_base = Path(args.output_dir).resolve()
+    print(f"Fetching skill: {args.skill_id} ...", file=sys.stderr)
+    try:
+        client = OpenSpaceClient(headers, api_base)
+        result = client.import_skill(args.skill_id, output_base)
+    except CloudError as e:
+        print(f"ERROR: {e}", file=sys.stderr)
+        sys.exit(1)
+    if result.get("status") == "already_exists" and not args.force:
+        print(
+            f"ERROR: Skill directory already exists: {result.get('local_path')}\n"
+            f"  Use --force to overwrite.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    files = result.get("files", [])
+    local_path = result.get("local_path", "")
+    print(f"  Extracted {len(files)} file(s) to {local_path}", file=sys.stderr)
+    for f in files:
+        print(f"    {f}", file=sys.stderr)
+    print(json.dumps(result, indent=2, ensure_ascii=False))
+    print(f"\nSkill downloaded to: {local_path}", file=sys.stderr)
+if __name__ == "__main__":
+    main()

openspace/cloud/cli/upload_skill.py ADDED Viewed

	@@ -0,0 +1,83 @@

+#!/usr/bin/env python3
+"""Upload a skill to the OpenSpace cloud platform.
+Usage:
+    openspace-upload-skill --skill-dir ./my-skill --visibility public --origin imported
+    openspace-upload-skill --skill-dir ./my-skill --visibility private --origin fixed --parent-ids "parent_id"
+"""
+from __future__ import annotations
+import argparse
+import json
+import sys
+from pathlib import Path
+from openspace.cloud.auth import get_api_base, get_auth_headers_or_exit
+from openspace.cloud.client import OpenSpaceClient, CloudError
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        prog="openspace-upload-skill",
+        description="Upload a skill to OpenSpace's cloud community",
+    )
+    parser.add_argument("--skill-dir", required=True, help="Path to skill directory (must contain SKILL.md)")
+    parser.add_argument("--visibility", required=True, choices=["public", "private"])
+    parser.add_argument("--origin", default="imported", choices=["imported", "captured", "derived", "fixed"])
+    parser.add_argument("--parent-ids", default="", help="Comma-separated parent skill IDs")
+    parser.add_argument("--tags", default="", help="Comma-separated tags")
+    parser.add_argument("--created-by", default="", help="Creator display name")
+    parser.add_argument("--change-summary", default="", help="Change summary text")
+    parser.add_argument("--api-base", default=None, help="Override API base URL")
+    parser.add_argument("--dry-run", action="store_true", help="List files without uploading")
+    args = parser.parse_args()
+    skill_dir = Path(args.skill_dir).resolve()
+    if not skill_dir.is_dir():
+        print(f"ERROR: Not a directory: {skill_dir}", file=sys.stderr)
+        sys.exit(1)
+    api_base = get_api_base(args.api_base)
+    if args.dry_run:
+        files = OpenSpaceClient._collect_files(skill_dir)
+        print(f"Dry run — would upload {len(files)} file(s):", file=sys.stderr)
+        for f in files:
+            print(f"  {f.relative_to(skill_dir)}", file=sys.stderr)
+        sys.exit(0)
+    headers = get_auth_headers_or_exit()
+    parent_ids = [p.strip() for p in args.parent_ids.split(",") if p.strip()]
+    tags = [t.strip() for t in args.tags.split(",") if t.strip()]
+    print(f"\n{'='*60}", file=sys.stderr)
+    print(f"Upload Skill: {skill_dir.name}", file=sys.stderr)
+    print(f"  Visibility:  {args.visibility}", file=sys.stderr)
+    print(f"  Origin:      {args.origin}", file=sys.stderr)
+    print(f"  API Base:    {api_base}", file=sys.stderr)
+    print(f"{'='*60}\n", file=sys.stderr)
+    try:
+        client = OpenSpaceClient(headers, api_base)
+        result = client.upload_skill(
+            skill_dir,
+            visibility=args.visibility,
+            origin=args.origin,
+            parent_skill_ids=parent_ids,
+            tags=tags,
+            created_by=args.created_by,
+            change_summary=args.change_summary,
+        )
+    except CloudError as e:
+        print(f"ERROR: {e}", file=sys.stderr)
+        sys.exit(1)
+    print(f"\nUpload complete!", file=sys.stderr)
+    print(json.dumps(result, indent=2, ensure_ascii=False))
+if __name__ == "__main__":
+    main()

openspace/cloud/client.py ADDED Viewed

	@@ -0,0 +1,497 @@

+"""OpenSpace cloud platform HTTP client.
+All methods are **synchronous** (use ``urllib``).  In async contexts
+(MCP server), wrap calls with ``asyncio.to_thread()``.
+Provides both low-level HTTP operations and higher-level workflows:
+  - ``fetch_record`` / ``download_artifact`` / ``fetch_metadata``
+  - ``stage_artifact`` / ``create_record``
+  - ``upload_skill`` (stage → diff → create — full workflow)
+  - ``import_skill`` (fetch → download → extract — full workflow)
+"""
+from __future__ import annotations
+import difflib
+import io
+import json
+import logging
+import os
+import uuid
+import urllib.error
+import urllib.parse
+import urllib.request
+import zipfile
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger("openspace.cloud")
+SKILL_FILENAME = "SKILL.md"
+SKILL_ID_FILENAME = ".skill_id"
+_TEXT_EXTENSIONS = frozenset({
+    ".md", ".txt", ".yaml", ".yml", ".json", ".py", ".sh", ".toml",
+})
+class CloudError(Exception):
+    """Raised when a cloud API call fails."""
+    def __init__(self, message: str, status_code: int = 0, body: str = ""):
+        super().__init__(message)
+        self.status_code = status_code
+        self.body = body
+class OpenSpaceClient:
+    """HTTP client for the OpenSpace cloud API.
+    Args:
+        auth_headers: Pre-resolved auth headers (from ``get_openspace_auth``).
+        api_base: API base URL (e.g. ``https://open-space.cloud/api/v1``).
+    """
+    _DEFAULT_UA = "OpenSpace-Client/1.0"
+    def __init__(self, auth_headers: Dict[str, str], api_base: str):
+        if not auth_headers:
+            raise CloudError(
+                "No OPENSPACE_API_KEY configured. "
+                "Register at https://open-space.cloud to obtain a key."
+            )
+        self._headers = {
+            "User-Agent": self._DEFAULT_UA,
+            **auth_headers,
+        }
+        self._base = api_base.rstrip("/")
+    def _request(
+        self,
+        method: str,
+        path: str,
+        *,
+        body: Optional[bytes] = None,
+        extra_headers: Optional[Dict[str, str]] = None,
+        timeout: int = 30,
+    ) -> tuple[int, bytes]:
+        """Execute HTTP request.  Returns ``(status_code, response_body)``."""
+        url = f"{self._base}{path}"
+        headers = {**self._headers}
+        if extra_headers:
+            headers.update(extra_headers)
+        req = urllib.request.Request(url, data=body, headers=headers, method=method)
+        try:
+            with urllib.request.urlopen(req, timeout=timeout) as resp:
+                return resp.status, resp.read()
+        except urllib.error.HTTPError as e:
+            resp_body = e.read().decode("utf-8", errors="replace")
+            raise CloudError(
+                f"HTTP {e.code}: {resp_body[:500]}",
+                status_code=e.code,
+                body=resp_body,
+            )
+        except urllib.error.URLError as e:
+            raise CloudError(f"Connection failed: {e.reason}")
+    def _get_json(self, path: str, timeout: int = 30) -> Dict[str, Any]:
+        _, data = self._request("GET", path, timeout=timeout)
+        return json.loads(data.decode("utf-8"))
+    def fetch_record(self, record_id: str) -> Dict[str, Any]:
+        """GET /records/{record_id} — fetch record metadata."""
+        return self._get_json(f"/records/{urllib.parse.quote(record_id)}")
+    def download_artifact(self, record_id: str) -> bytes:
+        """GET /records/{record_id}/download — download artifact zip bytes."""
+        _, data = self._request(
+            "GET",
+            f"/records/{urllib.parse.quote(record_id)}/download",
+            timeout=120,
+        )
+        return data
+    def fetch_metadata(
+        self,
+        *,
+        include_embedding: bool = False,
+        limit: int = 200,
+    ) -> List[Dict[str, Any]]:
+        """GET /records/metadata — fetch all visible records with pagination."""
+        all_items: List[Dict[str, Any]] = []
+        cursor: Optional[str] = None
+        while True:
+            params: Dict[str, str] = {"limit": str(limit)}
+            if include_embedding:
+                params["include_embedding"] = "true"
+            if cursor:
+                params["cursor"] = cursor
+            path = f"/records/metadata?{urllib.parse.urlencode(params)}"
+            data = self._get_json(path, timeout=15)
+            all_items.extend(data.get("items", []))
+            if not data.get("has_more"):
+                break
+            cursor = data.get("next_cursor")
+            if not cursor:
+                break
+        return all_items
+    def stage_artifact(self, skill_dir: Path) -> tuple[str, int]:
+        """POST /artifacts/stage — upload skill files.
+        Returns ``(artifact_id, file_count)``.
+        """
+        file_paths = self._collect_files(skill_dir)
+        if not file_paths:
+            raise CloudError("No files found in skill directory")
+        boundary = f"----OpenSpaceUpload{os.urandom(8).hex()}"
+        body_parts: list[bytes] = []
+        for fp in file_paths:
+            rel_path = str(fp.relative_to(skill_dir))
+            body_parts.append(f"--{boundary}\r\n".encode())
+            body_parts.append(
+                f'Content-Disposition: form-data; name="files"; '
+                f'filename="{rel_path}"\r\n'.encode()
+            )
+            ctype = "text/plain" if fp.suffix in _TEXT_EXTENSIONS else "application/octet-stream"
+            body_parts.append(f"Content-Type: {ctype}\r\n\r\n".encode())
+            body_parts.append(fp.read_bytes())
+            body_parts.append(b"\r\n")
+        body_parts.append(f"--{boundary}--\r\n".encode())
+        _, resp_data = self._request(
+            "POST",
+            "/artifacts/stage",
+            body=b"".join(body_parts),
+            extra_headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
+            timeout=60,
+        )
+        stage = json.loads(resp_data.decode("utf-8"))
+        artifact_id = stage.get("artifact_id")
+        if not artifact_id:
+            raise CloudError("No artifact_id in stage response")
+        file_count = stage.get("stats", {}).get("file_count", 0)
+        return artifact_id, file_count
+    def create_record(self, payload: Dict[str, Any]) -> tuple[Dict[str, Any], int]:
+        """POST /records — create skill record with 409 conflict handling.
+        Returns ``(response_data, status_code)``.
+        """
+        body = json.dumps(payload).encode("utf-8")
+        try:
+            status, resp_data = self._request(
+                "POST",
+                "/records",
+                body=body,
+                extra_headers={"Content-Type": "application/json"},
+            )
+            return json.loads(resp_data.decode("utf-8")), status
+        except CloudError as e:
+            if e.status_code == 409:
+                return self._handle_409(e.body, payload)
+            raise
+    def _handle_409(
+        self, body_text: str, payload: Dict[str, Any],
+    ) -> tuple[Dict[str, Any], int]:
+        """Handle 409 conflict responses."""
+        try:
+            err_data = json.loads(body_text)
+        except json.JSONDecodeError:
+            raise CloudError(f"409 conflict: {body_text}", status_code=409, body=body_text)
+        err_type = err_data.get("error", "")
+        if err_type == "fingerprint_record_id_conflict":
+            existing_id = err_data.get("existing_record_id", "")
+            return {
+                "record_id": existing_id,
+                "status": "duplicate",
+                "existing_record_id": existing_id,
+            }, 409
+        if err_type == "record_id_fingerprint_conflict":
+            # Retry with a new UUID
+            name = payload.get("name", "skill")
+            payload["record_id"] = f"{name}__clo_{uuid.uuid4().hex[:8]}"
+            retry_body = json.dumps(payload).encode("utf-8")
+            status, resp_data = self._request(
+                "POST",
+                "/records",
+                body=retry_body,
+                extra_headers={"Content-Type": "application/json"},
+            )
+            return json.loads(resp_data.decode("utf-8")), status
+        raise CloudError(f"409 conflict: {body_text}", status_code=409, body=body_text)
+    def upload_skill(
+        self,
+        skill_dir: Path,
+        *,
+        visibility: str = "public",
+        origin: str = "imported",
+        parent_skill_ids: Optional[List[str]] = None,
+        tags: Optional[List[str]] = None,
+        created_by: str = "",
+        change_summary: str = "",
+    ) -> Dict[str, Any]:
+        """Upload a local skill to the cloud (stage → diff → create record).
+        Returns a result dict with status, record_id, etc.
+        """
+        from openspace.skill_engine.skill_utils import parse_frontmatter
+        skill_path = Path(skill_dir)
+        skill_file = skill_path / SKILL_FILENAME
+        if not skill_file.exists():
+            raise CloudError(f"SKILL.md not found in {skill_dir}")
+        content = skill_file.read_text(encoding="utf-8")
+        fm = parse_frontmatter(content)
+        name = fm.get("name", skill_path.name)
+        description = fm.get("description", "")
+        if not name:
+            raise CloudError("SKILL.md frontmatter missing 'name' field")
+        parents = parent_skill_ids or []
+        self._validate_origin_parents(origin, parents)
+        api_visibility = "group_only" if visibility == "private" else "public"
+        # Step 1: Stage
+        logger.info(f"upload_skill: staging files for '{name}'")
+        artifact_id, file_count = self.stage_artifact(skill_path)
+        logger.info(f"upload_skill: staged {file_count} file(s), artifact_id={artifact_id}")
+        # Step 2: Content diff
+        content_diff = self._compute_content_diff(skill_path, api_visibility, parents)
+        # Step 3: Create record
+        record_id = f"{name}__clo_{uuid.uuid4().hex[:8]}"
+        payload: Dict[str, Any] = {
+            "record_id": record_id,
+            "artifact_id": artifact_id,
+            # name/description are NOT sent — the server extracts them
+            # from SKILL.md YAML frontmatter (Task 4+F4 change).
+            "origin": origin,
+            "visibility": api_visibility,
+            "parent_skill_ids": parents,
+            "tags": tags or [],
+            "level": "workflow",
+        }
+        if created_by:
+            payload["created_by"] = created_by
+        if change_summary:
+            payload["change_summary"] = change_summary
+        if content_diff is not None:
+            payload["content_diff"] = content_diff
+        record_data, status_code = self.create_record(payload)
+        action = "created" if status_code == 201 else "exists (idempotent)"
+        final_record_id = record_data.get("record_id", record_id)
+        logger.info(
+            f"upload_skill: {name} [{final_record_id}] — {action} "
+            f"(visibility={api_visibility}, origin={origin})"
+        )
+        # Check for duplicate status from 409 handling
+        if record_data.get("status") == "duplicate":
+            return {
+                "status": "duplicate",
+                "message": f"Same content already exists as record '{record_data.get('existing_record_id', '')}'",
+                "existing_record_id": record_data.get("existing_record_id", ""),
+            }
+        return {
+            "status": "success",
+            "action": action,
+            "record_id": final_record_id,
+            "name": name,
+            "description": description,
+            "visibility": api_visibility,
+            "origin": origin,
+            "parent_skill_ids": parents,
+            "artifact_id": artifact_id,
+            "file_count": file_count,
+        }
+    def import_skill(
+        self,
+        skill_id: str,
+        target_dir: Path,
+    ) -> Dict[str, Any]:
+        """Download a cloud skill and extract to a local directory.
+        Returns a result dict with status, local_path, files, etc.
+        """
+        # 1. Fetch metadata
+        logger.info(f"import_skill: fetching metadata for {skill_id}")
+        record_data = self.fetch_record(skill_id)
+        skill_name = record_data.get("name", skill_id)
+        skill_dir = target_dir / skill_name
+        # Check if already exists locally
+        if skill_dir.exists() and (skill_dir / SKILL_FILENAME).exists():
+            return {
+                "status": "already_exists",
+                "skill_id": skill_id,
+                "name": skill_name,
+                "local_path": str(skill_dir),
+            }
+        # 2. Download artifact
+        logger.info(f"import_skill: downloading artifact for {skill_id}")
+        zip_data = self.download_artifact(skill_id)
+        # 3. Extract
+        skill_dir.mkdir(parents=True, exist_ok=True)
+        extracted = self._extract_zip(zip_data, skill_dir)
+        # 4. Write .skill_id sidecar
+        (skill_dir / SKILL_ID_FILENAME).write_text(skill_id + "\n", encoding="utf-8")
+        logger.info(
+            f"import_skill: {skill_name} [{skill_id}] → {skill_dir} "
+            f"({len(extracted)} files)"
+        )
+        return {
+            "status": "success",
+            "skill_id": skill_id,
+            "name": skill_name,
+            "description": record_data.get("description", ""),
+            "local_path": str(skill_dir),
+            "files": extracted,
+        }
+    @staticmethod
+    def _collect_files(skill_dir: Path) -> List[Path]:
+        """Collect all files in skill directory (skip .skill_id sidecar)."""
+        return [
+            p for p in sorted(skill_dir.rglob("*"))
+            if p.is_file() and p.name != SKILL_ID_FILENAME
+        ]
+    @staticmethod
+    def _collect_text_files(skill_dir: Path) -> Dict[str, str]:
+        """Collect text files as ``{relative_path: content}``."""
+        files: Dict[str, str] = {}
+        for p in sorted(skill_dir.rglob("*")):
+            if p.is_file() and p.name != SKILL_ID_FILENAME:
+                rel = str(p.relative_to(skill_dir))
+                try:
+                    files[rel] = p.read_text(encoding="utf-8")
+                except (UnicodeDecodeError, OSError):
+                    pass
+        return files
+    @staticmethod
+    def _extract_zip(zip_data: bytes, target_dir: Path) -> List[str]:
+        """Extract zip bytes to target directory with path traversal protection."""
+        extracted: List[str] = []
+        try:
+            with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
+                for info in zf.infolist():
+                    if info.is_dir():
+                        continue
+                    clean_name = Path(info.filename).as_posix()
+                    if clean_name.startswith("..") or clean_name.startswith("/"):
+                        continue
+                    target_path = target_dir / clean_name
+                    target_path.parent.mkdir(parents=True, exist_ok=True)
+                    target_path.write_bytes(zf.read(info))
+                    extracted.append(clean_name)
+        except zipfile.BadZipFile:
+            raise CloudError("Downloaded artifact is not a valid zip file")
+        return extracted
+    @staticmethod
+    def _extract_zip_text_files(zip_data: bytes) -> Dict[str, str]:
+        """Extract text files from zip as ``{filename: content}``."""
+        files: Dict[str, str] = {}
+        try:
+            with zipfile.ZipFile(io.BytesIO(zip_data)) as zf:
+                for info in zf.infolist():
+                    if info.is_dir() or info.filename == SKILL_ID_FILENAME:
+                        continue
+                    try:
+                        files[info.filename] = zf.read(info).decode("utf-8")
+                    except (UnicodeDecodeError, KeyError):
+                        pass
+        except zipfile.BadZipFile:
+            pass
+        return files
+    @staticmethod
+    def _validate_origin_parents(origin: str, parents: List[str]) -> None:
+        if origin in ("imported", "captured") and parents:
+            raise CloudError(f"origin='{origin}' must not have parent_skill_ids")
+        if origin == "derived" and not parents:
+            raise CloudError("origin='derived' requires at least 1 parent_skill_id")
+        if origin == "fixed" and len(parents) != 1:
+            raise CloudError("origin='fixed' requires exactly 1 parent_skill_id")
+    def _compute_content_diff(
+        self,
+        skill_dir: Path,
+        api_visibility: str,
+        parents: List[str],
+    ) -> Optional[str]:
+        """Compute content_diff for the upload.
+        - public + single parent → diff vs ancestor
+        - public + no parent → add-all diff
+        - else → None
+        """
+        if api_visibility != "public":
+            return None
+        cur_files = self._collect_text_files(skill_dir)
+        if len(parents) == 1:
+            try:
+                anc_zip = self.download_artifact(parents[0])
+                anc_files = self._extract_zip_text_files(anc_zip)
+                diff = self._unified_diff(anc_files, cur_files)
+                if diff:
+                    logger.info(f"Computed diff vs ancestor {parents[0]}")
+                    return diff
+            except Exception as e:
+                logger.warning(f"Diff computation failed: {e}")
+            return None
+        if not parents:
+            return self._unified_diff({}, cur_files)
+        return None  # multiple parents
+    @staticmethod
+    def _unified_diff(old_files: Dict[str, str], new_files: Dict[str, str]) -> Optional[str]:
+        """Compute combined unified diff between two file snapshots."""
+        all_names = sorted(set(old_files) | set(new_files))
+        parts: List[str] = []
+        for fname in all_names:
+            old = old_files.get(fname, "")
+            new = new_files.get(fname, "")
+            d = "".join(difflib.unified_diff(
+                old.splitlines(keepends=True),
+                new.splitlines(keepends=True),
+                fromfile=f"a/{fname}",
+                tofile=f"b/{fname}",
+                n=3,
+            ))
+            if d:
+                parts.append(d)
+        return "\n".join(parts) if parts else None

openspace/cloud/embedding.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""Embedding generation via OpenAI-compatible API."""
+from __future__ import annotations
+import json
+import logging
+import math
+import os
+import urllib.request
+from typing import List, Optional, Tuple
+logger = logging.getLogger("openspace.cloud")
+# Constants (duplicated here to avoid top-level import of skill_ranker)
+SKILL_EMBEDDING_MODEL = "openai/text-embedding-3-small"
+SKILL_EMBEDDING_MAX_CHARS = 12_000
+SKILL_EMBEDDING_DIMENSIONS = 1536
+_OPENROUTER_BASE = "https://openrouter.ai/api/v1"
+_OPENAI_BASE = "https://api.openai.com/v1"
+def resolve_embedding_api() -> Tuple[Optional[str], str]:
+    """Resolve API key and base URL for embedding requests.
+    Priority:
+      1. ``OPENROUTER_API_KEY`` → OpenRouter base URL
+      2. ``OPENAI_API_KEY`` + ``OPENAI_BASE_URL`` (default ``api.openai.com``)
+      3. host-agent config (nanobot / openclaw)
+    Returns:
+        ``(api_key, base_url)`` — *api_key* may be ``None`` when no key is found.
+    """
+    or_key = os.environ.get("OPENROUTER_API_KEY")
+    if or_key:
+        return or_key, _OPENROUTER_BASE
+    oa_key = os.environ.get("OPENAI_API_KEY")
+    if oa_key:
+        base = os.environ.get("OPENAI_BASE_URL", _OPENAI_BASE).rstrip("/")
+        return oa_key, base
+    try:
+        from openspace.host_detection import get_openai_api_key
+        host_key = get_openai_api_key()
+        if host_key:
+            base = os.environ.get("OPENAI_BASE_URL", _OPENAI_BASE).rstrip("/")
+            return host_key, base
+    except Exception:
+        pass
+    return None, _OPENAI_BASE
+def cosine_similarity(a: List[float], b: List[float]) -> float:
+    """Compute cosine similarity between two vectors."""
+    if len(a) != len(b) or not a:
+        return 0.0
+    dot = sum(x * y for x, y in zip(a, b))
+    norm_a = math.sqrt(sum(x * x for x in a))
+    norm_b = math.sqrt(sum(x * x for x in b))
+    if norm_a == 0 or norm_b == 0:
+        return 0.0
+    return dot / (norm_a * norm_b)
+def build_skill_embedding_text(
+    name: str,
+    description: str,
+    readme_body: str,
+    max_chars: int = SKILL_EMBEDDING_MAX_CHARS,
+) -> str:
+    """Build text for skill embedding: ``name + description + SKILL.md body``.
+    Unified strategy matching MCP search_skills and clawhub platform.
+    """
+    header = "\n".join(filter(None, [name, description]))
+    raw = "\n\n".join(filter(None, [header, readme_body]))
+    if len(raw) <= max_chars:
+        return raw
+    return raw[:max_chars]
+def generate_embedding(text: str, api_key: Optional[str] = None) -> Optional[List[float]]:
+    """Generate embedding using OpenAI-compatible API.
+    When *api_key* is ``None``, credentials are resolved automatically via
+    :func:`resolve_embedding_api` (``OPENROUTER_API_KEY`` → ``OPENAI_API_KEY``
+    → host-agent config).
+    This is a **synchronous** call (uses urllib).  In async contexts,
+    wrap with ``asyncio.to_thread()``.
+    Args:
+        text: The text to embed.
+        api_key: Explicit API key.  When provided, base URL is still resolved
+                 from environment (``OPENROUTER_API_KEY`` presence determines
+                 the endpoint).
+    Returns:
+        Embedding vector, or None on failure.
+    """
+    resolved_key, base_url = resolve_embedding_api()
+    if api_key is None:
+        api_key = resolved_key
+    if not api_key:
+        return None
+    body = json.dumps({
+        "model": SKILL_EMBEDDING_MODEL,
+        "input": text,
+    }).encode("utf-8")
+    req = urllib.request.Request(
+        f"{base_url}/embeddings",
+        data=body,
+        headers={
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {api_key}",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=15) as resp:
+            data = json.loads(resp.read().decode("utf-8"))
+            return data.get("data", [{}])[0].get("embedding")
+    except Exception as e:
+        logger.warning("Embedding generation failed: %s", e)
+        return None

openspace/cloud/search.py ADDED Viewed

	@@ -0,0 +1,393 @@

+"""Hybrid skill search engine (BM25 + embedding + lexical boost).
+Implements the search pipeline:
+  Phase 1: BM25 rough-rank over all candidates
+  Phase 2: Vector scoring (embedding cosine similarity)
+  Phase 3: Hybrid score = vector_score + lexical_boost
+  Phase 4: Deduplication + limit
+Used by MCP ``search_skills`` tool, ``retrieve_skill`` agent tool,
+and potentially other search interfaces.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import re
+from typing import Any, Dict, List, Optional
+logger = logging.getLogger("openspace.cloud")
+def _check_safety(text: str) -> list[str]:
+    """Lazy wrapper — avoids importing skill_engine at module load time."""
+    from openspace.skill_engine.skill_utils import check_skill_safety
+    return check_skill_safety(text)
+def _is_safe(flags: list[str]) -> bool:
+    from openspace.skill_engine.skill_utils import is_skill_safe
+    return is_skill_safe(flags)
+_WORD_RE = re.compile(r"[a-z0-9]+")
+def _tokenize(value: str) -> list[str]:
+    return _WORD_RE.findall(value.lower()) if value else []
+def _lexical_boost(query_tokens: list[str], name: str, slug: str) -> float:
+    """Compute lexical boost score based on exact/prefix token matching."""
+    slug_tokens = _tokenize(slug)
+    name_tokens = _tokenize(name)
+    boost = 0.0
+    # Slug exact / prefix
+    if slug_tokens and all(
+        any(ct == qt for ct in slug_tokens) for qt in query_tokens
+    ):
+        boost += 1.4
+    elif slug_tokens and all(
+        any(ct.startswith(qt) for ct in slug_tokens) for qt in query_tokens
+    ):
+        boost += 0.8
+    # Name exact / prefix
+    if name_tokens and all(
+        any(ct == qt for ct in name_tokens) for qt in query_tokens
+    ):
+        boost += 1.1
+    elif name_tokens and all(
+        any(ct.startswith(qt) for ct in name_tokens) for qt in query_tokens
+    ):
+        boost += 0.6
+    return boost
+class SkillSearchEngine:
+    """Hybrid BM25 + embedding search engine for skills.
+    Usage::
+        engine = SkillSearchEngine()
+        results = engine.search(
+            query="weather forecast",
+            candidates=candidates,
+            query_embedding=[...],  # optional
+            limit=20,
+        )
+    """
+    def search(
+        self,
+        query: str,
+        candidates: List[Dict[str, Any]],
+        *,
+        query_embedding: Optional[List[float]] = None,
+        limit: int = 20,
+    ) -> List[Dict[str, Any]]:
+        """Run the full search pipeline on candidates.
+        Each candidate dict should have at minimum:
+          - ``skill_id``, ``name``, ``description``
+          - ``_embedding`` (optional): pre-computed embedding vector
+          - ``source``: "openspace-local" | "cloud"
+        Args:
+            query: Search query text.
+            candidates: Candidate dicts to rank.
+            query_embedding: Pre-computed query embedding (if available).
+            limit: Max results to return.
+        Returns:
+            Sorted list of result dicts (highest score first).
+        """
+        q = query.strip()
+        if not q or not candidates:
+            return []
+        query_tokens = _tokenize(q)
+        if not query_tokens:
+            return []
+        # Phase 1: BM25 rough-rank
+        filtered = self._bm25_phase(q, candidates, limit)
+        # Phase 2+3: Vector + lexical scoring
+        scored = self._score_phase(filtered, query_tokens, query_embedding)
+        # Phase 4: Deduplicate and limit
+        return self._dedup_and_limit(scored, limit)
+    def _bm25_phase(
+        self,
+        query: str,
+        candidates: List[Dict[str, Any]],
+        limit: int,
+    ) -> List[Dict[str, Any]]:
+        """BM25 rough-rank to keep top candidates for embedding stage."""
+        from openspace.skill_engine.skill_ranker import SkillRanker, SkillCandidate
+        ranker = SkillRanker(enable_cache=True)
+        bm25_candidates = [
+            SkillCandidate(
+                skill_id=c.get("skill_id", ""),
+                name=c.get("name", ""),
+                description=c.get("description", ""),
+                body="",
+                metadata=c,
+            )
+            for c in candidates
+        ]
+        ranked = ranker.bm25_only(query, bm25_candidates, top_k=min(limit * 3, len(candidates)))
+        ranked_ids = {sc.skill_id for sc in ranked}
+        filtered = [c for c in candidates if c.get("skill_id") in ranked_ids]
+        # If BM25 found nothing, fall back to all candidates
+        return filtered if filtered else candidates
+    def _score_phase(
+        self,
+        candidates: List[Dict[str, Any]],
+        query_tokens: list[str],
+        query_embedding: Optional[List[float]],
+    ) -> List[Dict[str, Any]]:
+        """Compute hybrid score = vector_score + lexical_boost."""
+        from openspace.cloud.embedding import cosine_similarity
+        scored = []
+        for c in candidates:
+            name = c.get("name", "")
+            slug = c.get("skill_id", name).split("__")[0].replace(":", "-")
+            # Vector score
+            vector_score = 0.0
+            if query_embedding:
+                skill_emb = c.get("_embedding")
+                if skill_emb and isinstance(skill_emb, list):
+                    vector_score = cosine_similarity(query_embedding, skill_emb)
+            # Lexical boost
+            lexical = _lexical_boost(query_tokens, name, slug)
+            final_score = vector_score + lexical
+            entry: Dict[str, Any] = {
+                "skill_id": c.get("skill_id", ""),
+                "name": name,
+                "description": c.get("description", ""),
+                "source": c.get("source", ""),
+                "score": round(final_score, 4),
+            }
+            if vector_score > 0:
+                entry["vector_score"] = round(vector_score, 4)
+            # Include optional fields
+            for key in ("path", "visibility", "created_by", "origin", "tags", "quality", "safety_flags"):
+                if c.get(key):
+                    entry[key] = c[key]
+            scored.append(entry)
+        scored.sort(key=lambda x: -x["score"])
+        return scored
+    @staticmethod
+    def _dedup_and_limit(
+        scored: List[Dict[str, Any]],
+        limit: int,
+    ) -> List[Dict[str, Any]]:
+        """Deduplicate by name and apply limit."""
+        seen: set[str] = set()
+        deduped = []
+        for item in scored:
+            name = item["name"]
+            if name in seen:
+                continue
+            seen.add(name)
+            deduped.append(item)
+        return deduped[:limit]
+def build_local_candidates(
+    skills: list,
+    store: Any = None,
+) -> List[Dict[str, Any]]:
+    """Build search candidate dicts from SkillRegistry skills.
+    Args:
+        skills: List of ``SkillMeta`` from ``registry.list_skills()``.
+        store: Optional ``SkillStore`` instance for quality data enrichment.
+    Returns:
+        List of candidate dicts ready for ``SkillSearchEngine.search()``.
+    """
+    from openspace.cloud.embedding import build_skill_embedding_text
+    candidates: List[Dict[str, Any]] = []
+    for s in skills:
+        # Read SKILL.md body
+        readme_body = ""
+        try:
+            raw = s.path.read_text(encoding="utf-8")
+            m = re.match(r"^---\n.*?\n---\n?", raw, re.DOTALL)
+            readme_body = raw[m.end():].strip() if m else raw
+        except Exception:
+            pass
+        embedding_text = build_skill_embedding_text(s.name, s.description, readme_body)
+        # Safety check
+        flags = _check_safety(embedding_text)
+        if not _is_safe(flags):
+            logger.info(f"BLOCKED local skill {s.skill_id} — {flags}")
+            continue
+        candidates.append({
+            "skill_id": s.skill_id,
+            "name": s.name,
+            "description": s.description,
+            "source": "openspace-local",
+            "path": str(s.path),
+            "is_local": True,
+            "safety_flags": flags if flags else None,
+            "_embedding_text": embedding_text,
+        })
+    # Enrich with quality data
+    if store and candidates:
+        try:
+            all_records = store.load_all(active_only=True)
+            for c in candidates:
+                rec = all_records.get(c["skill_id"])
+                if rec:
+                    c["quality"] = {
+                        "total_selections": rec.total_selections,
+                        "completion_rate": round(rec.completion_rate, 3),
+                        "effective_rate": round(rec.effective_rate, 3),
+                    }
+                    c["tags"] = rec.tags
+        except Exception as e:
+            logger.warning(f"Quality lookup failed: {e}")
+    return candidates
+def build_cloud_candidates(
+    items: List[Dict[str, Any]],
+) -> List[Dict[str, Any]]:
+    """Build search candidate dicts from cloud metadata items.
+    Args:
+        items: Items from ``OpenSpaceClient.fetch_metadata()``.
+    Returns:
+        List of candidate dicts (with safety filtering applied).
+    """
+    candidates: List[Dict[str, Any]] = []
+    for item in items:
+        name = item.get("name", "")
+        desc = item.get("description", "")
+        tags = item.get("tags", [])
+        safety_text = f"{name}\n{desc}\n{' '.join(tags)}"
+        flags = _check_safety(safety_text)
+        if not _is_safe(flags):
+            continue
+        c_entry: Dict[str, Any] = {
+            "skill_id": item.get("record_id", ""),
+            "name": name,
+            "description": desc,
+            "source": "cloud",
+            "visibility": item.get("visibility", "public"),
+            "is_local": False,
+            "created_by": item.get("created_by", ""),
+            "origin": item.get("origin", ""),
+            "tags": tags,
+            "safety_flags": flags if flags else None,
+        }
+        # Carry pre-computed embedding
+        platform_emb = item.get("embedding")
+        if platform_emb and isinstance(platform_emb, list):
+            c_entry["_embedding"] = platform_emb
+        candidates.append(c_entry)
+    return candidates
+async def hybrid_search_skills(
+    query: str,
+    local_skills: list = None,
+    store: Any = None,
+    source: str = "all",
+    limit: int = 20,
+) -> List[Dict[str, Any]]:
+    """Shared cloud+local skill search with graceful fallback.
+    Builds candidates, generates embeddings, runs ``SkillSearchEngine``.
+    Cloud is attempted when *source* includes it; failures are silently
+    skipped so the caller always gets local results at minimum.
+    Args:
+        query: Free-text search query.
+        local_skills: ``SkillMeta`` list (from ``registry.list_skills()``).
+        store: Optional ``SkillStore`` for quality enrichment.
+        source: ``"all"`` | ``"local"`` | ``"cloud"``.
+        limit: Maximum results.
+    Returns:
+        Ranked result dicts (same format as ``SkillSearchEngine.search()``).
+    """
+    from openspace.cloud.embedding import generate_embedding
+    q = query.strip()
+    if not q:
+        return []
+    candidates: List[Dict[str, Any]] = []
+    if source in ("all", "local") and local_skills:
+        candidates.extend(build_local_candidates(local_skills, store))
+    if source in ("all", "cloud"):
+        try:
+            from openspace.cloud.auth import get_openspace_auth
+            from openspace.cloud.client import OpenSpaceClient
+            auth_headers, api_base = get_openspace_auth()
+            if auth_headers:
+                client = OpenSpaceClient(auth_headers, api_base)
+                try:
+                    from openspace.cloud.embedding import resolve_embedding_api
+                    has_emb = bool(resolve_embedding_api()[0])
+                except Exception:
+                    has_emb = False
+                items = await asyncio.to_thread(
+                    client.fetch_metadata, include_embedding=has_emb, limit=200,
+                )
+                candidates.extend(build_cloud_candidates(items))
+        except Exception as e:
+            logger.warning(f"hybrid_search_skills: cloud unavailable: {e}")
+    if not candidates:
+        return []
+    # query embedding (optional — key/URL resolved inside generate_embedding)
+    query_embedding: Optional[List[float]] = None
+    try:
+        query_embedding = await asyncio.to_thread(generate_embedding, q)
+        if query_embedding:
+            for c in candidates:
+                if not c.get("_embedding") and c.get("_embedding_text"):
+                    emb = await asyncio.to_thread(
+                        generate_embedding, c["_embedding_text"],
+                    )
+                    if emb:
+                        c["_embedding"] = emb
+    except Exception:
+        pass
+    engine = SkillSearchEngine()
+    return engine.search(q, candidates, query_embedding=query_embedding, limit=limit)

openspace/config/README.md ADDED Viewed

	@@ -0,0 +1,115 @@

+# 🔧 Configuration Guide
+All configuration applies to both Path A (host agent) and Path B (standalone). Configure once before the first run.
+## 1. API Keys (`.env`)
+> [!NOTE]
+> Create a `.env` file and add your API keys (refer to [`.env.example`](../../.env.example)). When used via host agent (Path A), LLM keys are auto-detected from your agent's config — `.env` is mainly needed for standalone mode.
+## 2. Environment Variables
+Set via `.env`, MCP config `env` block, or system environment. OpenSpace reads these at startup.
+| Variable | Required | Description |
+|----------|----------|-------------|
+| `OPENSPACE_HOST_SKILL_DIRS` | Path A only | Your agent's skill directories (comma-separated). Auto-registered on startup. |
+| `OPENSPACE_WORKSPACE` | Recommended | OpenSpace project root. Used for recording logs and workspace resolution. |
+| `OPENSPACE_API_KEY` | No | Cloud API key (`sk-xxx`). Register at https://open-space.cloud. |
+| `OPENSPACE_MODEL` | No | LLM model override (default: auto-detected or `openrouter/anthropic/claude-sonnet-4.5`). |
+| `OPENSPACE_MAX_ITERATIONS` | No | Max agent iterations per task (default: `20`). |
+| `OPENSPACE_BACKEND_SCOPE` | No | Enabled backends, comma-separated (default: all — `shell,gui,mcp,web,system`). |
+### Advanced env overrides (rarely needed)
+| Variable | Description |
+|----------|-------------|
+| `OPENSPACE_LLM_API_KEY` | LLM API key (auto-detected from host agent in Path A) |
+| `OPENSPACE_LLM_API_BASE` | LLM API base URL |
+| `OPENSPACE_LLM_EXTRA_HEADERS` | Extra HTTP headers for LLM requests (JSON string) |
+| `OPENSPACE_LLM_CONFIG` | Arbitrary litellm kwargs (JSON string) |
+| `OPENSPACE_API_BASE` | Cloud API base URL (default `https://open-space.cloud/api/v1`) |
+| `OPENSPACE_CONFIG_PATH` | Custom grounding config JSON (deep-merged with defaults) |
+| `OPENSPACE_SHELL_CONDA_ENV` | Conda environment for shell backend |
+| `OPENSPACE_SHELL_WORKING_DIR` | Working directory for shell backend |
+| `OPENSPACE_MCP_SERVERS_JSON` | MCP server definitions (JSON string, merged into `mcpServers`) |
+| `OPENSPACE_ENABLE_RECORDING` | Record execution traces (default: `true`) |
+| `OPENSPACE_LOG_LEVEL` | `DEBUG` / `INFO` / `WARNING` / `ERROR` |
+## 3. MCP Servers (`config_mcp.json`)
+Register external MCP servers that OpenSpace connects to as a **client** (e.g. GitHub, Slack, databases):
+```bash
+cp openspace/config/config_mcp.json.example openspace/config/config_mcp.json
+```
+```json
+{
+  "mcpServers": {
+    "github": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-github"],
+      "env": { "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}" }
+    }
+  }
+}
+```
+## 4. Execution Mode: Local vs Server
+Shell and GUI backends support two execution modes, set via `"mode"` in `config_grounding.json`:
+| | Local Mode (`"local"`, default) | Server Mode (`"server"`) |
+|---|---|---|
+| **Setup** | Zero config | Start `local_server` first |
+| **Use case** | Same-machine development | Remote VMs, sandboxing, multi-machine |
+| **How** | `asyncio.subprocess` in-process | HTTP → Flask → subprocess |
+> [!TIP]
+> **Use local mode** for most use cases. For server mode setup (how to enable, platform-specific deps, remote VM control), see [`../local_server/README.md`](../local_server/README.md).
+## 5. Config Files (`openspace/config/`)
+Layered system — later files override earlier ones:
+| File | Purpose |
+|------|---------|
+| `config_grounding.json` | Backend settings, smart tool retrieval, tool quality, skill discovery |
+| `config_agents.json` | Agent definitions, backend scope, max iterations |
+| `config_mcp.json` | MCP servers OpenSpace connects to as a client |
+| `config_security.json` | Security policies, blocked commands, sandboxing |
+| `config_dev.json` | Dev overrides — copy from `config_dev.json.example` (highest priority) |
+### Agent config (`config_agents.json`)
+```json
+{ "agents": [{ "name": "GroundingAgent", "backend_scope": ["shell", "mcp", "web"], "max_iterations": 30 }] }
+```
+| Field | Description | Default |
+|-------|-------------|---------|
+| `backend_scope` | Enabled backends | `["gui", "shell", "mcp", "system", "web"]` |
+| `max_iterations` | Max execution cycles | `20` |
+| `visual_analysis_timeout` | Timeout for visual analysis (seconds) | `30.0` |
+### Backend & tool config (`config_grounding.json`)
+| Section | Key Fields | Description |
+|---------|-----------|-------------|
+| `shell` | `mode`, `timeout`, `conda_env`, `working_dir` | `"local"` (default) or `"server"`, command timeout (default: `60`s) |
+| `gui` | `mode`, `timeout`, `driver_type`, `screenshot_on_error` | Local/server mode, automation driver (default: `pyautogui`) |
+| `mcp` | `timeout`, `sandbox`, `eager_sessions` | Request timeout (`30`s), E2B sandbox, lazy/eager server init |
+| `tool_search` | `search_mode`, `max_tools`, `enable_llm_filter` | `"hybrid"` (semantic + LLM), max tools to return (`40`), embedding cache |
+| `tool_quality` | `enabled`, `enable_persistence`, `evolve_interval` | Quality tracking, self-evolution every N calls (default: `5`) |
+| `skills` | `enabled`, `skill_dirs`, `max_select` | Directories to scan, max skills injected per task (default: `2`) |
+### Security config (`config_security.json`)
+| Field | Description | Default |
+|-------|-------------|---------|
+| `allow_shell_commands` | Enable shell execution | `true` |
+| `blocked_commands` | Platform-specific blacklists (common/linux/darwin/windows) | `rm -rf`, `shutdown`, `dd`, etc. |
+| `sandbox_enabled` | Enable sandboxing for all operations | `false` |
+| Per-backend overrides | Shell, MCP, GUI, Web each have independent security policies | Inherit global |

openspace/config/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from .grounding import *
+from .loader import *
+from .constants import *
+from .utils import *
+from . import constants
+__all__ = [
+    # Grounding Config
+    "BackendConfig",
+    "ShellConfig",
+    "WebConfig",
+    "MCPConfig",
+    "GUIConfig",
+    "ToolSearchConfig",
+    "SessionConfig",
+    "SecurityPolicy",
+    "GroundingConfig",
+    # Loader
+    "CONFIG_DIR",
+    "load_config",
+    "get_config",
+    "reset_config",
+    "save_config",
+    "load_agents_config",
+    "get_agent_config",
+    # Utils
+    "get_config_value",
+    "load_json_file",
+    "save_json_file",
+] + constants.__all__

openspace/config/config_agents.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "agents": [
+    {
+      "name": "GroundingAgent",
+      "class_name": "GroundingAgent",
+      "backend_scope": ["shell", "mcp", "system"],
+      "max_iterations": 30,
+      "visual_analysis_timeout": 60.0
+    }
+  ]
+}

openspace/config/config_dev.json.example ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "comment": "[Optional] Loading grounding.json → security.json → dev.json (dev.json overrides the former ones)",
+  "debug": true,
+  "log_level": "DEBUG",
+  "security_policies": {
+    "global": {
+      "blocked_commands": []
+    }
+  }
+}

openspace/config/config_grounding.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "shell": {
+    "mode": "local",
+    "timeout": 60,
+    "max_retries": 3,
+    "retry_interval": 3.0,
+    "default_shell": "/bin/bash",
+    "working_dir": null,
+    "env": {},
+    "conda_env": null,
+    "default_port": 5000
+  },
+  "mcp": {
+    "timeout": 30,
+    "max_retries": 3,
+    "retry_interval": 2.0,
+    "sandbox": false,
+    "auto_initialize": true,
+    "eager_sessions": false,
+    "sse_read_timeout": 300.0,
+    "check_dependencies": true,
+    "auto_install": true
+  },
+  "gui": {
+    "mode": "local",
+    "timeout": 90,
+    "max_retries": 3,
+    "retry_interval": 5.0,
+    "driver_type": "pyautogui",
+    "failsafe": false,
+    "screenshot_on_error": true,
+    "pkgs_prefix": "import pyautogui; import time; pyautogui.FAILSAFE = {failsafe}; {command}"
+  },
+  "tool_search": {
+    "embedding_model": "BAAI/bge-small-en-v1.5",
+    "max_tools": 40,
+    "search_mode": "hybrid",
+    "enable_llm_filter": true,
+    "llm_filter_threshold": 50,
+    "enable_cache_persistence": true,
+    "cache_dir": null
+  },
+  "tool_quality": {
+    "enabled": true,
+    "enable_persistence": true,
+    "cache_dir": null,
+    "auto_evaluate_descriptions": true,
+    "enable_quality_ranking": true,
+    "evolve_interval": 5
+  },
+  "skills": {
+    "enabled": true,
+    "skill_dirs": [],
+    "max_select": 2
+  },
+  "tool_cache_ttl": 600,
+  "tool_cache_maxsize": 500,
+  "debug": false,
+  "log_level": "INFO",
+  "enabled_backends": [
+    {
+      "name": "shell",
+      "provider_cls": "openspace.grounding.backends.shell.ShellProvider"
+    },
+    {
+      "name": "web",
+      "provider_cls": "openspace.grounding.backends.web.WebProvider"
+    },
+    {
+      "name": "mcp",
+      "provider_cls": "openspace.grounding.backends.mcp.MCPProvider"
+    },
+    {
+      "name": "gui",
+      "provider_cls": "openspace.grounding.backends.gui.GUIProvider"
+    }
+  ],
+  "_comment_system_backend": "Note: 'system' backend is automatically registered and always available. It provides meta-level tools for querying system state. Do not add it to enabled_backends as it requires special initialization."
+}

openspace/config/config_mcp.json.example ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "mcpServers": {
+    "github": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-github"],
+      "env": {
+        "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
+      }
+    }
+  }
+}

openspace/config/config_security.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "security_policies": {
+    "global": {
+      "allow_shell_commands": true,
+      "allow_network_access": true,
+      "allow_file_access": true,
+      "blocked_commands": {
+        "common": ["rm", "-rf", "shutdown", "reboot", "poweroff", "halt"],
+        "linux": ["mkfs", "dd", "iptables", "systemctl", "init", "kill", "-9", "pkill"],
+        "darwin": ["diskutil", "dd", "pfctl", "launchctl", "killall"],
+        "windows": ["del", "format", "rd", "rmdir", "/s", "/q", "taskkill", "/f"]
+      },
+      "sandbox_enabled": false
+    },
+    "backend": {
+      "shell": {
+        "allow_shell_commands": true,
+        "allow_file_access": true,
+        "blocked_commands": {
+          "common": ["rm", "-rf", "shutdown", "reboot", "poweroff", "halt"],
+          "linux": [
+            "mkfs", "mkfs.ext4", "mkfs.xfs",
+            "dd",
+            "iptables", "ip6tables", "nftables",
+            "systemctl", "service",
+            "fdisk", "parted", "gdisk",
+            "mount", "umount",
+            "chmod", "777",
+            "chown", "root",
+            "passwd",
+            "useradd", "userdel", "usermod",
+            "kill", "-9", "pkill", "killall"
+          ],
+          "darwin": [
+            "diskutil",
+            "dd",
+            "pfctl",
+            "launchctl",
+            "dscl",
+            "chmod", "777",
+            "chown", "root",
+            "passwd",
+            "killall",
+            "pmset"
+          ],
+          "windows": [
+            "del", "erase",
+            "format",
+            "rd", "rmdir", "/s", "/q",
+            "diskpart",
+            "reg", "delete",
+            "net", "user",
+            "taskkill", "/f",
+            "wmic"
+          ]
+        },
+        "sandbox_enabled": false
+      },
+      "mcp": {
+        "sandbox_enabled": false
+      },
+      "web": {
+        "allow_network_access": true,
+        "allowed_domains": []
+      }
+    }
+  }
+}

openspace/config/constants.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from pathlib import Path
+CONFIG_GROUNDING = "config_grounding.json"
+CONFIG_SECURITY = "config_security.json"
+CONFIG_MCP = "config_mcp.json"
+CONFIG_DEV = "config_dev.json"
+CONFIG_AGENTS = "config_agents.json"
+LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
+# Project root directory (OpenSpace/)
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+__all__ = [
+    "CONFIG_GROUNDING",
+    "CONFIG_SECURITY",
+    "CONFIG_MCP",
+    "CONFIG_DEV",
+    "CONFIG_AGENTS",
+    "LOG_LEVELS",
+    "PROJECT_ROOT",
+]

openspace/config/grounding.py ADDED Viewed

	@@ -0,0 +1,311 @@

+from typing import Dict, Optional, Any, List, Literal
+try:
+    from pydantic import BaseModel, Field, field_validator
+    PYDANTIC_V2 = True
+except ImportError:
+    from pydantic import BaseModel, Field, validator as field_validator
+    PYDANTIC_V2 = False
+from openspace.grounding.core.types import (
+    SessionConfig,
+    SecurityPolicy,
+    BackendType
+)
+from .constants import LOG_LEVELS
+class ConfigMixin:
+    """Mixin to add utility methods for config access"""
+    def get_value(self, key: str, default=None):
+        """
+        Safely get config value, works with both dict and Pydantic models.
+        Args:
+            key: Configuration key
+            default: Default value if key not found
+        """
+        if isinstance(self, dict):
+            return self.get(key, default)
+        else:
+            return getattr(self, key, default)
+class BackendConfig(BaseModel, ConfigMixin):
+    """Base backend configuration"""
+    enabled: bool = Field(True, description="Whether the backend is enabled")
+    timeout: int = Field(30, ge=1, le=300, description="Timeout in seconds")
+    max_retries: int = Field(3, ge=0, le=10, description="Maximum retry attempts")
+class ShellConfig(BackendConfig):
+    """
+    Shell backend configuration
+    Attributes:
+        enabled: Whether shell backend is enabled
+        mode: Execution mode - "local" runs scripts in-process via subprocess,
+              "server" connects to a running local_server via HTTP
+        timeout: Default timeout for shell operations (seconds)
+        max_retries: Maximum number of retry attempts for failed operations
+        retry_interval: Wait time between retries (seconds)
+        default_shell: Path to default shell executable
+        working_dir: Default working directory for bash scripts
+        env: Default environment variables for shell operations
+        conda_env: Conda environment name to activate before execution (optional)
+        default_port: Default port for shell server connection (only used in server mode)
+    """
+    mode: Literal["local", "server"] = Field("local", description="Execution mode: 'local' (in-process subprocess) or 'server' (HTTP local_server)")
+    retry_interval: float = Field(3.0, ge=0.1, le=60.0, description="Wait time between retries in seconds")
+    default_shell: str = Field("/bin/bash", description="Default shell path")
+    working_dir: Optional[str] = Field(None, description="Default working directory for bash scripts")
+    env: Dict[str, str] = Field(default_factory=dict, description="Default environment variables")
+    conda_env: Optional[str] = Field(None, description="Conda environment name to activate (e.g., 'myenv')")
+    default_port: int = Field(5000, ge=1, le=65535, description="Default port for shell server")
+    use_clawwork_productivity: bool = Field(
+        False,
+        description="If True and livebench is installed, add ClawWork productivity tools (search_web, read_webpage, create_file, read_file, execute_code_sandbox, create_video) for fair comparison with ClawWork."
+    )
+    productivity_date: str = Field(
+        "default",
+        description="Date segment for productivity sandbox paths (e.g. 'default' or 'YYYY-MM-DD'). Used when use_clawwork_productivity is True."
+    )
+    @field_validator('default_shell')
+    @classmethod
+    def validate_shell(cls, v):
+        if not v or not isinstance(v, str):
+            raise ValueError("Shell path must be a non-empty string")
+        return v
+    @field_validator('working_dir')
+    @classmethod
+    def validate_working_dir(cls, v):
+        if v is not None and not isinstance(v, str):
+            raise ValueError("Working directory must be a string")
+        return v
+class WebConfig(BackendConfig):
+    """
+    Web backend configuration - AI Deep Research
+    Attributes:
+        enabled: Whether web backend is enabled
+        timeout: Default timeout for web operations (seconds)
+        max_retries: Maximum number of retry attempts
+    Note:
+        All web-specific parameters (API key, base URL) are loaded from
+        environment variables or use default values in WebSession:
+        - OPENROUTER_API_KEY: API key for deep research (required)
+        - Deep research base URL defaults to "https://openrouter.ai/api/v1"
+    """
+    pass
+class MCPConfig(BackendConfig):
+    """MCP backend configuration"""
+    sandbox: bool = Field(False, description="Whether to enable sandbox")
+    auto_initialize: bool = Field(True, description="Whether to auto initialize")
+    eager_sessions: bool = Field(False, description="Whether to eagerly create sessions for all servers on initialization")
+    retry_interval: float = Field(2.0, ge=0.1, le=60.0, description="Wait time between retries in seconds")
+    servers: Dict[str, Dict[str, Any]] = Field(default_factory=dict, description="MCP servers configuration, loaded from config_mcp.json")
+    sse_read_timeout: float = Field(300.0, ge=1.0, le=3600.0, description="SSE read timeout in seconds for HTTP/Sandbox connectors")
+class GUIConfig(BackendConfig):
+    """
+    GUI backend configuration
+    Attributes:
+        mode: Execution mode - "local" runs GUI operations in-process,
+              "server" connects to a running local_server via HTTP
+    """
+    mode: Literal["local", "server"] = Field("local", description="Execution mode: 'local' (in-process) or 'server' (HTTP local_server)")
+    retry_interval: float = Field(5.0, ge=0.1, le=60.0, description="Wait time between retries in seconds")
+    driver_type: str = Field("pyautogui", description="GUI driver type")
+    failsafe: bool = Field(False, description="Whether to enable pyautogui failsafe mode")
+    screenshot_on_error: bool = Field(True, description="Whether to capture screenshot on error")
+    pkgs_prefix: str = Field(
+        "import pyautogui; import time; pyautogui.FAILSAFE = {failsafe}; {command}",
+        description="Python command prefix for pyautogui setup"
+    )
+class ToolSearchConfig(BaseModel):
+    """Tool search and ranking configuration"""
+    embedding_model: str = Field(
+        "BAAI/bge-small-en-v1.5",
+        description="Embedding model name for semantic search"
+    )
+    max_tools: int = Field(
+        20,
+        ge=1,
+        le=1000,
+        description="Maximum number of tools to return from search"
+    )
+    search_mode: str = Field(
+        "hybrid",
+        description="Default search mode: semantic, keyword, or hybrid"
+    )
+    enable_llm_filter: bool = Field(
+        True,
+        description="Whether to use LLM for backend/server filtering"
+    )
+    llm_filter_threshold: int = Field(
+        50,
+        ge=1,
+        le=1000,
+        description="Only apply LLM filter when tool count exceeds this threshold"
+    )
+    enable_cache_persistence: bool = Field(
+        False,
+        description="Whether to persist embeddings to disk"
+    )
+    cache_dir: Optional[str] = Field(
+        None,
+        description="Directory for embedding cache. None means use default <project_root>/.openspace/embedding_cache"
+    )
+    @field_validator('search_mode')
+    @classmethod
+    def validate_search_mode(cls, v):
+        valid_modes = ['semantic', 'keyword', 'hybrid']
+        if v.lower() not in valid_modes:
+            raise ValueError(f"Search mode must be one of {valid_modes}, got: {v}")
+        return v.lower()
+class ToolQualityConfig(BaseModel):
+    """Tool quality tracking configuration"""
+    enabled: bool = Field(
+        True,
+        description="Whether to enable tool quality tracking"
+    )
+    enable_persistence: bool = Field(
+        True,
+        description="Whether to persist quality data to disk"
+    )
+    cache_dir: Optional[str] = Field(
+        None,
+        description="Directory for quality cache. None means use default <project_root>/.openspace/tool_quality"
+    )
+    auto_evaluate_descriptions: bool = Field(
+        True,
+        description="Whether to automatically evaluate tool descriptions using LLM"
+    )
+    enable_quality_ranking: bool = Field(
+        True,
+        description="Whether to incorporate quality scores in tool ranking"
+    )
+    evolve_interval: int = Field(
+        5,
+        ge=1,
+        le=100,
+        description="Trigger quality evolution every N tool executions"
+    )
+class SkillConfig(BaseModel):
+    """Skill engine configuration
+    Controls how skills are discovered, selected and injected.
+    Built-in skills (``openspace/skills/``) are always auto-discovered.
+    """
+    enabled: bool = Field(True, description="Enable skill matching and injection")
+    skill_dirs: List[str] = Field(
+        default_factory=list,
+        description="Extra skill directories. Built-in openspace/skills/ is always included."
+    )
+    max_select: int = Field(
+        2, ge=1, le=20,
+        description="Maximum number of skills to inject per task"
+    )
+class GroundingConfig(BaseModel):
+    """
+    Main configuration for Grounding module.
+    Contains configuration for all grounding backends and grounding-level settings.
+    Note: Local server connection uses defaults or environment variables (LOCAL_SERVER_URL).
+    """
+    # Backend configurations
+    shell: ShellConfig = Field(default_factory=ShellConfig)
+    web: WebConfig = Field(default_factory=WebConfig)
+    mcp: MCPConfig = Field(default_factory=MCPConfig)
+    gui: GUIConfig = Field(default_factory=GUIConfig)
+    system: BackendConfig = Field(default_factory=BackendConfig)
+    # Grounding-level settings
+    tool_search: ToolSearchConfig = Field(default_factory=ToolSearchConfig)
+    tool_quality: ToolQualityConfig = Field(default_factory=ToolQualityConfig)
+    skills: SkillConfig = Field(default_factory=SkillConfig)
+    enabled_backends: List[Dict[str, str]] = Field(
+        default_factory=list,
+        description="List of enabled backends, each item: {'name': str, 'provider_cls': str}"
+    )
+    session_defaults: SessionConfig = Field(
+        default_factory=lambda: SessionConfig(
+            session_name="",
+            backend_type=BackendType.SHELL,
+            timeout=30,
+            auto_reconnect=True,
+            health_check_interval=30
+        )
+    )
+    tool_cache_ttl: int = Field(
+        300,
+        ge=1,
+        le=3600,
+        description="Tool cache time-to-live in seconds"
+    )
+    tool_cache_maxsize: int = Field(
+        300,
+        ge=1,
+        le=10000,
+        description="Maximum number of tool cache entries"
+    )
+    debug: bool = Field(False, description="Debug mode")
+    log_level: str = Field("INFO", description="Log level")
+    security_policies: Dict[str, Any] = Field(default_factory=dict)
+    @field_validator('log_level')
+    @classmethod
+    def validate_log_level(cls, v):
+        if v.upper() not in LOG_LEVELS:
+            raise ValueError(f"Log level must be one of {LOG_LEVELS}, got: {v}")
+        return v.upper()
+    def get_backend_config(self, backend_type: str) -> BackendConfig:
+        """Get configuration for specified backend"""
+        name = backend_type.lower()
+        if not hasattr(self, name):
+            from openspace.utils.logging import Logger
+            logger = Logger.get_logger(__name__)
+            logger.warning(f"Unknown backend type: {backend_type}")
+            return BackendConfig()
+        return getattr(self, name)
+    def get_security_policy(self, backend_type: str) -> SecurityPolicy:
+        global_policy = self.security_policies.get("global", {})
+        backend_policy = self.security_policies.get("backend", {}).get(backend_type.lower(), {})
+        merged_policy = {**global_policy, **backend_policy}
+        return SecurityPolicy.from_dict(merged_policy)
+__all__ = [
+    "BackendConfig",
+    "ShellConfig",
+    "WebConfig",
+    "MCPConfig",
+    "GUIConfig",
+    "ToolSearchConfig",
+    "ToolQualityConfig",
+    "SkillConfig",
+    "GroundingConfig",
+]

openspace/config/loader.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import threading
+from pathlib import Path
+from typing import Union, Iterable, Dict, Any, Optional
+from .grounding import GroundingConfig
+from .constants import (
+    CONFIG_GROUNDING,
+    CONFIG_SECURITY,
+    CONFIG_DEV,
+    CONFIG_MCP,
+    CONFIG_AGENTS
+)
+from openspace.utils.logging import Logger
+from .utils import load_json_file, save_json_file as save_json
+logger = Logger.get_logger(__name__)
+CONFIG_DIR = Path(__file__).parent
+# Global configuration singleton
+_config: GroundingConfig | None = None
+_config_lock = threading.RLock()  # Use RLock to support recursive locking
+def _deep_merge_dict(base: dict, update: dict) -> dict:
+    """Deep merge two dictionaries, update's values will override base's values"""
+    result = base.copy()
+    for key, value in update.items():
+        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+            result[key] = _deep_merge_dict(result[key], value)
+        else:
+            result[key] = value
+    return result
+def _load_json_file(path: Path) -> Dict[str, Any]:
+    """Load single JSON configuration file.
+    This function wraps the generic load_json_file and adds global configuration specific error handling and logging.
+    """
+    if not path.exists():
+        logger.debug(f"Configuration file does not exist, skipping: {path}")
+        return {}
+    try:
+        data = load_json_file(path)
+        logger.info(f"Loaded configuration file: {path}")
+        return data
+    except Exception as e:
+        logger.warning(f"Failed to load configuration file {path}: {e}")
+        return {}
+def _load_multiple_files(paths: Iterable[Path]) -> Dict[str, Any]:
+    """Load configuration from multiple files"""
+    merged = {}
+    for path in paths:
+        data = _load_json_file(path)
+        if data:
+            merged = _deep_merge_dict(merged, data)
+    return merged
+def load_config(*config_paths: Union[str, Path]) -> GroundingConfig:
+    """
+    Load configuration files
+    """
+    global _config
+    with _config_lock:
+        if config_paths:
+            paths = [Path(p) for p in config_paths]
+        else:
+            paths = [
+                CONFIG_DIR / CONFIG_GROUNDING,
+                CONFIG_DIR / CONFIG_SECURITY,
+                CONFIG_DIR / CONFIG_DEV,  # Optional: development environment configuration
+            ]
+        # Load and merge configuration
+        raw_data = _load_multiple_files(paths)
+        # Load MCP configuration (separate processing)
+        # Check if mcpServers already provided in merged custom configs
+        has_custom_mcp_servers = "mcpServers" in raw_data
+        if has_custom_mcp_servers:
+            # Use mcpServers from custom config
+            if "mcp" not in raw_data:
+                raw_data["mcp"] = {}
+            raw_data["mcp"]["servers"] = raw_data.pop("mcpServers")
+            logger.debug(f"Using custom MCP servers from provided config ({len(raw_data['mcp']['servers'])} servers)")
+        else:
+            # Load default MCP servers from config_mcp.json
+            mcp_data = _load_json_file(CONFIG_DIR / CONFIG_MCP)
+            if mcp_data and "mcpServers" in mcp_data:
+                if "mcp" not in raw_data:
+                    raw_data["mcp"] = {}
+                raw_data["mcp"]["servers"] = mcp_data["mcpServers"]
+                logger.debug(f"Loaded MCP servers from default config_mcp.json ({len(raw_data['mcp']['servers'])} servers)")
+        # Validate and create configuration object
+        try:
+            _config = GroundingConfig.model_validate(raw_data)
+        except Exception as e:
+            logger.error(f"Validation failed, using default configuration: {e}")
+            _config = GroundingConfig()
+        # Adjust log level according to configuration
+        if _config.debug:
+            Logger.set_debug(2)
+        elif _config.log_level:
+            try:
+                Logger.configure(level=_config.log_level)
+            except Exception as e:
+                logger.warning(f"Failed to set log level {_config.log_level}: {e}")
+    return _config
+def get_config() -> GroundingConfig:
+    """
+    Get global configuration instance.
+    Usage:
+        - Get configuration in Provider: get_config().get_backend_config('shell')
+        - Get security policy in Tool: get_config().get_security_policy('shell')
+    """
+    global _config
+    if _config is None:
+        with _config_lock:
+            if _config is None:
+                load_config()
+    return _config
+def reset_config() -> None:
+    """Reset configuration (for testing)"""
+    global _config
+    with _config_lock:
+        _config = None
+def save_config(config: GroundingConfig, path: Union[str, Path]) -> None:
+    save_json(config.model_dump(), path)
+    logger.info(f"Configuration saved to: {path}")
+def load_agents_config() -> Dict[str, Any]:
+    agents_config_path = CONFIG_DIR / CONFIG_AGENTS
+    return _load_json_file(agents_config_path)
+def get_agent_config(agent_name: str) -> Optional[Dict[str, Any]]:
+    """
+    Get the configuration of the specified agent
+    """
+    agents_config = load_agents_config()
+    if "agents" not in agents_config:
+        logger.warning(f"No 'agents' key found in {CONFIG_AGENTS}")
+        return None
+    for agent_cfg in agents_config.get("agents", []):
+        if agent_cfg.get("name") == agent_name:
+            return agent_cfg
+    logger.warning(f"Agent '{agent_name}' not found in {CONFIG_AGENTS}")
+    return None
+__all__ = [
+    "CONFIG_DIR",
+    "load_config",
+    "get_config",
+    "reset_config",
+    "save_config",
+    "load_agents_config",
+    "get_agent_config"
+]

openspace/config/utils.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import json
+from pathlib import Path
+from typing import Any
+def get_config_value(config: Any, key: str, default=None):
+    if isinstance(config, dict):
+        return config.get(key, default)
+    else:
+        return getattr(config, key, default)
+def load_json_file(filepath: str | Path) -> dict[str, Any]:
+    filepath = Path(filepath) if isinstance(filepath, str) else filepath
+    with open(filepath, 'r', encoding='utf-8') as f:
+        return json.load(f)
+def save_json_file(data: dict[str, Any], filepath: str | Path, indent: int = 2) -> None:
+    filepath = Path(filepath) if isinstance(filepath, str) else filepath
+    # Ensure directory exists
+    filepath.parent.mkdir(parents=True, exist_ok=True)
+    with open(filepath, 'w', encoding='utf-8') as f:
+        json.dump(data, f, indent=indent, ensure_ascii=False)
+__all__ = ["get_config_value", "load_json_file", "save_json_file"]

openspace/dashboard_server.py ADDED Viewed

	@@ -0,0 +1,639 @@

+from __future__ import annotations
+import argparse
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+from flask import Flask, abort, jsonify, send_from_directory, url_for, request
+from openspace.recording.action_recorder import analyze_agent_actions, load_agent_actions
+from openspace.recording.utils import load_recording_session
+from openspace.skill_engine import SkillStore
+from openspace.skill_engine.types import SkillRecord
+API_PREFIX = "/api/v1"
+FRONTEND_DIST_DIR = PROJECT_ROOT / "frontend" / "dist"
+WORKFLOW_ROOTS = [
+    PROJECT_ROOT / "logs" / "recordings",
+    PROJECT_ROOT / "logs" / "trajectories",
+    PROJECT_ROOT / "gdpval_bench" / "results",
+]
+PIPELINE_STAGES = [
+    {
+        "id": "initialize",
+        "title": "Initialize",
+        "description": "Load LLM, grounding backends, recording, registry, analyzer, and evolver.",
+    },
+    {
+        "id": "select-skills",
+        "title": "Skill Selection",
+        "description": "Select candidate skills and write selection metadata before execution.",
+    },
+    {
+        "id": "phase-1-skill",
+        "title": "Skill Phase",
+        "description": "Run the task with injected skill context whenever matching skills exist.",
+    },
+    {
+        "id": "phase-2-fallback",
+        "title": "Tool Fallback",
+        "description": "Fallback to tool-only execution when the skill-guided phase fails or no skills match.",
+    },
+    {
+        "id": "analysis",
+        "title": "Execution Analysis",
+        "description": "Persist metadata, trajectory, and post-run execution judgments.",
+    },
+    {
+        "id": "evolution",
+        "title": "Skill Evolution",
+        "description": "Trigger fix / derived / captured evolution and periodic quality checks.",
+    },
+]
+_STORE: SkillStore | None = None
+def create_app() -> Flask:
+    app = Flask(__name__, static_folder=None)
+    @app.before_request
+    def check_api_key():
+        # Allow preflight requests (CORS)
+        if request.method == "OPTIONS":
+            return
+        expected_key = os.environ.get("OPENSPACE_API_KEY")
+        if expected_key:
+            auth_header = request.headers.get("Authorization")
+            if not auth_header or auth_header != f"Bearer {expected_key}":
+                abort(401, description="Unauthorized: Invalid or missing API Key")
+    @app.route(f"{API_PREFIX}/health", methods=["GET"])
+    def health() -> Any:
+        workflows = _discover_workflow_dirs()
+        store = _get_store()
+        return jsonify(
+            {
+                "status": "ok",
+                "project_root": str(PROJECT_ROOT),
+                "db_path": str(store.db_path),
+                "db_exists": store.db_path.exists(),
+                "frontend_dist_exists": FRONTEND_DIST_DIR.exists(),
+                "workflow_roots": [str(path) for path in WORKFLOW_ROOTS],
+                "workflow_count": len(workflows),
+            }
+        )
+    @app.route(f"{API_PREFIX}/overview", methods=["GET"])
+    def overview() -> Any:
+        store = _get_store()
+        skills = list(store.load_all(active_only=False).values())
+        workflows = [_build_workflow_summary(path) for path in _discover_workflow_dirs()]
+        top_skills = _sort_skills(skills, sort_key="score")[:5]
+        recent_skills = _sort_skills(skills, sort_key="updated")[:5]
+        average_score = round(
+            sum(_skill_score(record) for record in skills) / len(skills), 1
+        ) if skills else 0.0
+        average_workflow_success = round(
+            (sum((item.get("success_rate") or 0.0) for item in workflows) / len(workflows)) * 100,
+            1,
+        ) if workflows else 0.0
+        return jsonify(
+            {
+                "health": {
+                    "status": "ok",
+                    "db_path": str(store.db_path),
+                    "workflow_count": len(workflows),
+                    "frontend_dist_exists": FRONTEND_DIST_DIR.exists(),
+                },
+                "pipeline": PIPELINE_STAGES,
+                "skills": {
+                    "summary": _build_skill_stats(store, skills),
+                    "average_score": average_score,
+                    "top": [_serialize_skill(item) for item in top_skills],
+                    "recent": [_serialize_skill(item) for item in recent_skills],
+                },
+                "workflows": {
+                    "total": len(workflows),
+                    "average_success_rate": average_workflow_success,
+                    "recent": workflows[:5],
+                },
+            }
+        )
+    @app.route(f"{API_PREFIX}/skills", methods=["GET"])
+    def list_skills() -> Any:
+        store = _get_store()
+        active_only = _bool_arg("active_only", True)
+        limit = _int_arg("limit", 100)
+        sort_key = (_str_arg("sort", "score") or "score").lower()
+        skills = list(store.load_all(active_only=active_only).values())
+        query = (_str_arg("query", "") or "").strip().lower()
+        if query:
+            skills = [
+                record
+                for record in skills
+                if query in record.name.lower()
+                or query in record.skill_id.lower()
+                or query in record.description.lower()
+                or any(query in tag.lower() for tag in record.tags)
+            ]
+        items = [_serialize_skill(item) for item in _sort_skills(skills, sort_key=sort_key)[:limit]]
+        return jsonify({"items": items, "count": len(items), "active_only": active_only})
+    @app.route(f"{API_PREFIX}/skills/stats", methods=["GET"])
+    def skill_stats() -> Any:
+        store = _get_store()
+        skills = list(store.load_all(active_only=False).values())
+        return jsonify(_build_skill_stats(store, skills))
+    @app.route(f"{API_PREFIX}/skills/<skill_id>", methods=["GET"])
+    def skill_detail(skill_id: str) -> Any:
+        store = _get_store()
+        record = store.load_record(skill_id)
+        if not record:
+            abort(404, description=f"Unknown skill_id: {skill_id}")
+        detail = _serialize_skill(record, include_recent_analyses=True)
+        detail["lineage_graph"] = _build_lineage_payload(skill_id, store)
+        detail["recent_analyses"] = [analysis.to_dict() for analysis in store.load_analyses(skill_id=skill_id, limit=10)]
+        detail["source"] = _load_skill_source(record)
+        return jsonify(detail)
+    @app.route(f"{API_PREFIX}/skills/<skill_id>/lineage", methods=["GET"])
+    def skill_lineage(skill_id: str) -> Any:
+        store = _get_store()
+        if not store.load_record(skill_id):
+            abort(404, description=f"Unknown skill_id: {skill_id}")
+        return jsonify(_build_lineage_payload(skill_id, store))
+    @app.route(f"{API_PREFIX}/skills/<skill_id>/source", methods=["GET"])
+    def skill_source(skill_id: str) -> Any:
+        store = _get_store()
+        record = store.load_record(skill_id)
+        if not record:
+            abort(404, description=f"Unknown skill_id: {skill_id}")
+        return jsonify(_load_skill_source(record))
+    @app.route(f"{API_PREFIX}/workflows", methods=["GET"])
+    def list_workflows() -> Any:
+        items = [_build_workflow_summary(path) for path in _discover_workflow_dirs()]
+        return jsonify({"items": items, "count": len(items)})
+    @app.route(f"{API_PREFIX}/workflows/<workflow_id>", methods=["GET"])
+    def workflow_detail(workflow_id: str) -> Any:
+        workflow_dir = _get_workflow_dir(workflow_id)
+        if not workflow_dir:
+            abort(404, description=f"Unknown workflow: {workflow_id}")
+        session = load_recording_session(str(workflow_dir))
+        actions = load_agent_actions(str(workflow_dir))
+        metadata = session.get("metadata") or {}
+        trajectory = session.get("trajectory") or []
+        plans = session.get("plans") or []
+        decisions = session.get("decisions") or []
+        action_stats = analyze_agent_actions(actions)
+        enriched_trajectory = []
+        for step in trajectory:
+            step_copy = dict(step)
+            screenshot_rel = step_copy.get("screenshot")
+            if screenshot_rel:
+                step_copy["screenshot_url"] = url_for(
+                    "workflow_artifact",
+                    workflow_id=workflow_id,
+                    artifact_path=screenshot_rel,
+                )
+            enriched_trajectory.append(step_copy)
+        timeline = _build_timeline(actions, enriched_trajectory)
+        artifacts = _build_workflow_artifacts(workflow_dir, workflow_id, metadata)
+        return jsonify(
+            {
+                **_build_workflow_summary(workflow_dir),
+                "metadata": metadata,
+                "statistics": session.get("statistics") or {},
+                "trajectory": enriched_trajectory,
+                "plans": plans,
+                "decisions": decisions,
+                "agent_actions": actions,
+                "agent_statistics": action_stats,
+                "timeline": timeline,
+                "artifacts": artifacts,
+            }
+        )
+    @app.route(f"{API_PREFIX}/workflows/<workflow_id>/artifacts/<path:artifact_path>", methods=["GET"])
+    def workflow_artifact(workflow_id: str, artifact_path: str) -> Any:
+        workflow_dir = _get_workflow_dir(workflow_id)
+        if not workflow_dir:
+            abort(404, description=f"Unknown workflow: {workflow_id}")
+        target = (workflow_dir / artifact_path).resolve()
+        root = workflow_dir.resolve()
+        if root not in target.parents and target != root:
+            abort(404)
+        if not target.exists() or not target.is_file():
+            abort(404)
+        return send_from_directory(str(target.parent), target.name)
+    @app.route("/", defaults={"path": ""})
+    @app.route("/<path:path>")
+    def serve_frontend(path: str) -> Any:
+        if path.startswith("api/"):
+            abort(404)
+        if FRONTEND_DIST_DIR.exists():
+            requested = FRONTEND_DIST_DIR / path if path else FRONTEND_DIST_DIR / "index.html"
+            if path and requested.exists() and requested.is_file():
+                return send_from_directory(str(FRONTEND_DIST_DIR), path)
+            return send_from_directory(str(FRONTEND_DIST_DIR), "index.html")
+        return jsonify(
+            {
+                "message": "OpenSpace dashboard API is running.",
+                "frontend": "Build frontend/ first or run the Vite dev server.",
+            }
+        )
+    return app
+def _get_store() -> SkillStore:
+    global _STORE
+    if _STORE is None:
+        _STORE = SkillStore()
+    return _STORE
+def _bool_arg(name: str, default: bool) -> bool:
+    from flask import request
+    raw = request.args.get(name)
+    if raw is None:
+        return default
+    return raw.lower() not in {"0", "false", "no", "off"}
+def _int_arg(name: str, default: int) -> int:
+    from flask import request
+    raw = request.args.get(name)
+    if raw is None:
+        return default
+    try:
+        return int(raw)
+    except ValueError:
+        return default
+def _str_arg(name: str, default: str) -> str:
+    from flask import request
+    return request.args.get(name, default)
+def _skill_score(record: SkillRecord) -> float:
+    return round(record.effective_rate * 100, 1)
+def _serialize_skill(record: SkillRecord, *, include_recent_analyses: bool = False) -> Dict[str, Any]:
+    payload = record.to_dict()
+    if not include_recent_analyses:
+        payload.pop("recent_analyses", None)
+    path = payload.get("path", "")
+    lineage = payload.get("lineage") or {}
+    payload.update(
+        {
+            "skill_dir": str(Path(path).parent) if path else "",
+            "origin": lineage.get("origin", ""),
+            "generation": lineage.get("generation", 0),
+            "parent_skill_ids": lineage.get("parent_skill_ids", []),
+            "applied_rate": round(record.applied_rate, 4),
+            "completion_rate": round(record.completion_rate, 4),
+            "effective_rate": round(record.effective_rate, 4),
+            "fallback_rate": round(record.fallback_rate, 4),
+            "score": _skill_score(record),
+        }
+    )
+    return payload
+def _naive_dt(dt: datetime) -> datetime:
+    """Strip tzinfo so naive/aware datetimes can be compared safely."""
+    return dt.replace(tzinfo=None) if dt.tzinfo else dt
+def _sort_skills(records: Iterable[SkillRecord], *, sort_key: str) -> List[SkillRecord]:
+    if sort_key == "updated":
+        return sorted(records, key=lambda item: _naive_dt(item.last_updated), reverse=True)
+    if sort_key == "name":
+        return sorted(records, key=lambda item: item.name.lower())
+    return sorted(
+        records,
+        key=lambda item: (_skill_score(item), item.total_selections, _naive_dt(item.last_updated).timestamp()),
+        reverse=True,
+    )
+def _build_skill_stats(store: SkillStore, skills: List[SkillRecord]) -> Dict[str, Any]:
+    stats = store.get_stats(active_only=False)
+    avg_score = round(sum(_skill_score(item) for item in skills) / len(skills), 1) if skills else 0.0
+    skills_with_recent_analysis = sum(1 for item in skills if item.recent_analyses)
+    return {
+        **stats,
+        "average_score": avg_score,
+        "skills_with_activity": sum(1 for item in skills if item.total_selections > 0),
+        "skills_with_recent_analysis": skills_with_recent_analysis,
+        "top_by_effective_rate": [_serialize_skill(item) for item in _sort_skills(skills, sort_key="score")[:5]],
+    }
+def _load_skill_source(record: SkillRecord) -> Dict[str, Any]:
+    skill_path = Path(record.path)
+    if not skill_path.exists() or not skill_path.is_file():
+        return {"exists": False, "path": record.path, "content": None}
+    try:
+        return {
+            "exists": True,
+            "path": str(skill_path),
+            "content": skill_path.read_text(encoding="utf-8"),
+        }
+    except OSError:
+        return {"exists": False, "path": str(skill_path), "content": None}
+def _build_lineage_payload(skill_id: str, store: SkillStore) -> Dict[str, Any]:
+    records = store.load_all(active_only=False)
+    if skill_id not in records:
+        return {"skill_id": skill_id, "nodes": [], "edges": [], "total_nodes": 0}
+    children_by_parent: Dict[str, set[str]] = {}
+    for item in records.values():
+        for parent_id in item.lineage.parent_skill_ids:
+            children_by_parent.setdefault(parent_id, set()).add(item.skill_id)
+    related_ids = {skill_id}
+    frontier = [skill_id]
+    while frontier:
+        current = frontier.pop()
+        record = records.get(current)
+        if not record:
+            continue
+        for parent_id in record.lineage.parent_skill_ids:
+            if parent_id not in related_ids:
+                related_ids.add(parent_id)
+                frontier.append(parent_id)
+        for child_id in children_by_parent.get(current, set()):
+            if child_id not in related_ids:
+                related_ids.add(child_id)
+                frontier.append(child_id)
+    nodes = []
+    edges = []
+    for related_id in sorted(related_ids):
+        record = records.get(related_id)
+        if not record:
+            continue
+        nodes.append(
+            {
+                "skill_id": record.skill_id,
+                "name": record.name,
+                "description": record.description,
+                "origin": record.lineage.origin.value,
+                "generation": record.lineage.generation,
+                "created_at": record.lineage.created_at.isoformat(),
+                "visibility": record.visibility.value,
+                "is_active": record.is_active,
+                "tags": list(record.tags),
+                "score": _skill_score(record),
+                "effective_rate": round(record.effective_rate, 4),
+                "total_selections": record.total_selections,
+            }
+        )
+        for parent_id in record.lineage.parent_skill_ids:
+            if parent_id in related_ids:
+                edges.append({"source": parent_id, "target": record.skill_id})
+    return {
+        "skill_id": skill_id,
+        "nodes": nodes,
+        "edges": edges,
+        "total_nodes": len(nodes),
+    }
+def _discover_workflow_dirs() -> List[Path]:
+    discovered: Dict[str, Path] = {}
+    for root in WORKFLOW_ROOTS:
+        if not root.exists():
+            continue
+        _scan_workflow_tree(root, discovered)
+    return sorted(discovered.values(), key=lambda item: item.stat().st_mtime, reverse=True)
+def _scan_workflow_tree(directory: Path, discovered: Dict[str, Path], *, _depth: int = 0, _max_depth: int = 6) -> None:
+    if _depth > _max_depth:
+        return
+    try:
+        children = list(directory.iterdir())
+    except OSError:
+        return
+    for child in children:
+        if not child.is_dir():
+            continue
+        if (child / "metadata.json").exists() or (child / "traj.jsonl").exists():
+            discovered.setdefault(child.name, child)
+        else:
+            _scan_workflow_tree(child, discovered, _depth=_depth + 1, _max_depth=_max_depth)
+def _get_workflow_dir(workflow_id: str) -> Optional[Path]:
+    for path in _discover_workflow_dirs():
+        if path.name == workflow_id:
+            return path
+    return None
+def _build_workflow_summary(workflow_dir: Path) -> Dict[str, Any]:
+    session = load_recording_session(str(workflow_dir))
+    metadata = session.get("metadata") or {}
+    statistics = session.get("statistics") or {}
+    actions = load_agent_actions(str(workflow_dir))
+    screenshots_dir = workflow_dir / "screenshots"
+    screenshot_count = len(list(screenshots_dir.glob("*.png"))) if screenshots_dir.exists() else 0
+    video_candidates = [workflow_dir / "screen_recording.mp4", workflow_dir / "recording.mp4"]
+    video_url = None
+    for candidate in video_candidates:
+        if candidate.exists():
+            rel = candidate.relative_to(workflow_dir).as_posix()
+            video_url = url_for("workflow_artifact", workflow_id=workflow_dir.name, artifact_path=rel)
+            break
+    outcome = metadata.get("execution_outcome") or {}
+    # Instruction fallback chain: top-level → retrieved_tools.instruction → skill_selection.task
+    instruction = (
+        metadata.get("instruction")
+        or (metadata.get("retrieved_tools") or {}).get("instruction")
+        or (metadata.get("skill_selection") or {}).get("task")
+        or ""
+    )
+    # Resolve start/end times with trajectory fallback
+    start_time = metadata.get("start_time")
+    end_time = metadata.get("end_time")
+    trajectory = session.get("trajectory") or []
+    # If end_time is missing, infer from last trajectory step
+    if not end_time and trajectory:
+        last_ts = trajectory[-1].get("timestamp")
+        if last_ts:
+            end_time = last_ts
+    # Compute execution_time: prefer outcome, fallback to timestamp diff
+    execution_time = outcome.get("execution_time", 0)
+    if not execution_time and start_time and end_time:
+        try:
+            t0 = datetime.fromisoformat(start_time)
+            t1 = datetime.fromisoformat(end_time)
+            execution_time = round((t1 - t0).total_seconds(), 2)
+        except (ValueError, TypeError):
+            pass
+    # Resolve status: prefer outcome, fallback heuristic
+    status = outcome.get("status", "")
+    if not status:
+        total_steps = statistics.get("total_steps", 0)
+        if total_steps > 0:
+            status = "success"
+        elif trajectory:
+            status = "completed"
+        else:
+            status = "unknown"
+    # Resolve iterations: prefer outcome, fallback to conversation count
+    iterations = outcome.get("iterations", 0)
+    if not iterations and trajectory:
+        iterations = len(trajectory)
+    return {
+        "id": workflow_dir.name,
+        "path": str(workflow_dir),
+        "task_id": metadata.get("task_id") or metadata.get("task_name") or workflow_dir.name,
+        "task_name": metadata.get("task_name") or metadata.get("task_id") or workflow_dir.name,
+        "instruction": instruction,
+        "status": status,
+        "iterations": iterations,
+        "execution_time": execution_time,
+        "start_time": start_time,
+        "end_time": end_time,
+        "total_steps": statistics.get("total_steps", 0),
+        "success_count": statistics.get("success_count", 0),
+        "success_rate": statistics.get("success_rate", 0.0),
+        "backend_counts": statistics.get("backends", {}),
+        "tool_counts": statistics.get("tools", {}),
+        "agent_action_count": len(actions),
+        "has_video": bool(video_url),
+        "video_url": video_url,
+        "screenshot_count": screenshot_count,
+        "selected_skills": (metadata.get("skill_selection") or {}).get("selected", []),
+    }
+def _build_timeline(actions: List[Dict[str, Any]], trajectory: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    events: List[Dict[str, Any]] = []
+    for action in actions:
+        events.append(
+            {
+                "timestamp": action.get("timestamp", ""),
+                "type": "agent_action",
+                "step": action.get("step"),
+                "label": action.get("action_type", "agent_action"),
+                "agent_name": action.get("agent_name", ""),
+                "agent_type": action.get("agent_type", ""),
+                "details": action,
+            }
+        )
+    for step in trajectory:
+        events.append(
+            {
+                "timestamp": step.get("timestamp", ""),
+                "type": "tool_execution",
+                "step": step.get("step"),
+                "label": step.get("tool", "tool_execution"),
+                "backend": step.get("backend", ""),
+                "status": (step.get("result") or {}).get("status", "unknown"),
+                "details": step,
+            }
+        )
+    events.sort(key=lambda item: (item.get("timestamp", ""), item.get("step") or 0))
+    return events
+def _build_workflow_artifacts(workflow_dir: Path, workflow_id: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
+    screenshots: List[Dict[str, Any]] = []
+    screenshots_dir = workflow_dir / "screenshots"
+    if screenshots_dir.exists():
+        for image in sorted(screenshots_dir.glob("*.png")):
+            rel = image.relative_to(workflow_dir).as_posix()
+            screenshots.append(
+                {
+                    "name": image.name,
+                    "path": rel,
+                    "url": url_for("workflow_artifact", workflow_id=workflow_id, artifact_path=rel),
+                }
+            )
+    init_screenshot = metadata.get("init_screenshot")
+    init_screenshot_url = (
+        url_for("workflow_artifact", workflow_id=workflow_id, artifact_path=init_screenshot)
+        if isinstance(init_screenshot, str)
+        else None
+    )
+    video_url = None
+    for rel in ("screen_recording.mp4", "recording.mp4"):
+        candidate = workflow_dir / rel
+        if candidate.exists():
+            video_url = url_for("workflow_artifact", workflow_id=workflow_id, artifact_path=rel)
+            break
+    return {
+        "init_screenshot_url": init_screenshot_url,
+        "screenshots": screenshots,
+        "video_url": video_url,
+    }
+def main() -> None:
+    parser = argparse.ArgumentParser(description="OpenSpace dashboard API server")
+    parser.add_argument("--host", default="127.0.0.1", help="Dashboard API host")
+    parser.add_argument("--port", type=int, default=7788, help="Dashboard API port")
+    parser.add_argument("--debug", action="store_true", help="Enable Flask debug mode")
+    args = parser.parse_args()
+    app = create_app()
+    from werkzeug.serving import run_simple
+    run_simple(
+        args.host,
+        args.port,
+        app,
+        threaded=True,
+        use_debugger=args.debug,
+        use_reloader=args.debug,
+    )
+if __name__ == "__main__":
+    main()

openspace/grounding/backends/__init__.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# Use lazy imports to avoid loading all backends unconditionally
+def _lazy_import_provider(provider_name: str):
+    """Lazy import provider class"""
+    if provider_name == 'mcp':
+        from .mcp.provider import MCPProvider
+        return MCPProvider
+    elif provider_name == 'shell':
+        from .shell.provider import ShellProvider
+        return ShellProvider
+    elif provider_name == 'web':
+        from .web.provider import WebProvider
+        return WebProvider
+    elif provider_name == 'gui':
+        from .gui.provider import GUIProvider
+        return GUIProvider
+    else:
+        raise ImportError(f"Unknown provider: {provider_name}")
+class _ProviderRegistry:
+    """Lazy provider registry"""
+    def __getitem__(self, key):
+        return _lazy_import_provider(key)
+    def __contains__(self, key):
+        return key in ['mcp', 'shell', 'web', 'gui']
+BACKEND_PROVIDERS = _ProviderRegistry()
+__all__ = [
+    'BACKEND_PROVIDERS',
+    '_lazy_import_provider'
+]

openspace/grounding/backends/gui/__init__.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from .provider import GUIProvider
+from .session import GUISession
+from .transport.connector import GUIConnector
+from .transport.local_connector import LocalGUIConnector
+try:
+    from .anthropic_client import AnthropicGUIClient
+    from . import anthropic_utils
+    _anthropic_available = True
+except ImportError:
+    _anthropic_available = False
+__all__ = [
+    # Core Provider and Session
+    "GUIProvider",
+    "GUISession",
+    # Transport layer
+    "GUIConnector",
+    "LocalGUIConnector",
+]
+# Add Anthropic modules to exports if available
+if _anthropic_available:
+    __all__.extend(["AnthropicGUIClient", "anthropic_utils"])

openspace/grounding/backends/gui/anthropic_client.py ADDED Viewed

	@@ -0,0 +1,575 @@

+import base64
+import os
+import time
+from typing import Any, Dict, Optional, Tuple, List
+from openspace.utils.logging import Logger
+from PIL import Image
+import io
+logger = Logger.get_logger(__name__)
+try:
+    from anthropic import (
+        Anthropic,
+        AnthropicBedrock,
+        AnthropicVertex,
+        APIError,
+        APIResponseValidationError,
+        APIStatusError,
+    )
+    from anthropic.types.beta import (
+        BetaMessageParam,
+        BetaTextBlockParam,
+    )
+    ANTHROPIC_AVAILABLE = True
+except ImportError:
+    logger.warning("Anthropic SDK not available. Install with: pip install anthropic")
+    ANTHROPIC_AVAILABLE = False
+# Import utility functions
+from .anthropic_utils import (
+    APIProvider,
+    PROVIDER_TO_DEFAULT_MODEL_NAME,
+    COMPUTER_USE_BETA_FLAG,
+    PROMPT_CACHING_BETA_FLAG,
+    get_system_prompt,
+    inject_prompt_caching,
+    maybe_filter_to_n_most_recent_images,
+    response_to_params,
+)
+# API retry configuration
+API_RETRY_TIMES = 10
+API_RETRY_INTERVAL = 5  # seconds
+class AnthropicGUIClient:
+    """
+    Anthropic LLM Client for GUI operations.
+    Uses Claude Sonnet 4.5 with computer-use-2025-01-24 API.
+    Features:
+    - Vision-based screen understanding
+    - Automatic screenshot resizing (configurable display size)
+    - Coordinate scaling between display and actual screen
+    """
+    def __init__(
+        self,
+        model: str = "claude-sonnet-4-5",
+        platform: str = "Ubuntu",
+        api_key: Optional[str] = None,
+        provider: str = "anthropic",
+        max_tokens: int = 4096,
+        screen_size: Tuple[int, int] = (1920, 1080),
+        display_size: Tuple[int, int] = (1024, 768),  # Computer use display size
+        pyautogui_size: Optional[Tuple[int, int]] = None,  # PyAutoGUI working size
+        only_n_most_recent_images: int = 3,
+        enable_prompt_caching: bool = True,
+        backup_api_key: Optional[str] = None,
+    ):
+        """
+        Initialize Anthropic GUI Client for Claude Sonnet 4.5.
+        Args:
+            model: Model name (only "claude-sonnet-4-5" supported)
+            platform: Platform type (Ubuntu, Windows, or macOS)
+            api_key: Anthropic API key (defaults to ANTHROPIC_API_KEY env var)
+            provider: API provider (only "anthropic" supported)
+            max_tokens: Maximum tokens for response
+            screen_size: Actual screenshot resolution (width, height) - physical pixels
+            display_size: Display size for computer use tool (width, height)
+                         Screenshots will be resized to this size before sending to API
+            pyautogui_size: PyAutoGUI working size (logical pixels). If None, assumed same as screen_size.
+                           On Retina/HiDPI displays, this may be screen_size / 2
+            only_n_most_recent_images: Number of recent screenshots to keep in history
+            enable_prompt_caching: Whether to enable prompt caching for cost optimization
+            backup_api_key: Backup API key (defaults to ANTHROPIC_API_KEY_BACKUP env var)
+        """
+        if not ANTHROPIC_AVAILABLE:
+            raise RuntimeError("Anthropic SDK not installed. Install with: pip install anthropic")
+        # Only support claude-sonnet-4-5
+        if model != "claude-sonnet-4-5":
+            logger.warning(f"Model '{model}' not supported. Using 'claude-sonnet-4-5'")
+            model = "claude-sonnet-4-5"
+        self.model = model
+        self.platform = platform
+        self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
+        if not self.api_key:
+            raise ValueError("Anthropic API key not provided. Set ANTHROPIC_API_KEY env var or pass api_key parameter")
+        # Backup API key for failover
+        self.backup_api_key = backup_api_key or os.environ.get("ANTHROPIC_API_KEY_BACKUP")
+        # Only support anthropic provider
+        if provider != "anthropic":
+            logger.warning(f"Provider '{provider}' not supported. Using 'anthropic'")
+            provider = "anthropic"
+        self.provider = APIProvider(provider)
+        self.max_tokens = max_tokens
+        self.screen_size = screen_size
+        self.display_size = display_size
+        self.pyautogui_size = pyautogui_size or screen_size  # Default to screen_size if not specified
+        self.only_n_most_recent_images = only_n_most_recent_images
+        self.enable_prompt_caching = enable_prompt_caching
+        # Message history
+        self.messages: List[BetaMessageParam] = []
+        # Calculate resize factor for coordinate scaling
+        # Step 1: LLM coordinates (display_size) -> Physical pixels (screen_size)
+        # Step 2: Physical pixels -> PyAutoGUI logical pixels (pyautogui_size)
+        self.resize_factor = (
+            self.pyautogui_size[0] / display_size[0],  # x scale factor
+            self.pyautogui_size[1] / display_size[1]   # y scale factor
+        )
+        logger.info(
+            f"Initialized AnthropicGUIClient:\n"
+            f"  Model: {model}\n"
+            f"  Platform: {platform}\n"
+            f"  Screen Size (physical): {screen_size}\n"
+            f"  PyAutoGUI Size (logical): {self.pyautogui_size}\n"
+            f"  Display Size (LLM): {display_size}\n"
+            f"  Resize Factor (LLM->PyAutoGUI): {self.resize_factor}\n"
+            f"  Prompt Caching: {enable_prompt_caching}"
+        )
+    def _create_client(self, api_key: Optional[str] = None):
+        """Create Anthropic client (only supports anthropic provider)."""
+        key = api_key or self.api_key
+        return Anthropic(api_key=key, max_retries=4)
+    def _resize_screenshot(self, screenshot_bytes: bytes) -> bytes:
+        """
+        Resize screenshot to display size for Computer Use API.
+        For computer-use-2025-01-24, the screenshot must be resized to the
+        display_width_px x display_height_px specified in the tool definition.
+        """
+        screenshot_image = Image.open(io.BytesIO(screenshot_bytes))
+        resized_image = screenshot_image.resize(self.display_size, Image.Resampling.LANCZOS)
+        output_buffer = io.BytesIO()
+        resized_image.save(output_buffer, format='PNG')
+        return output_buffer.getvalue()
+    def _scale_coordinates(self, x: int, y: int) -> Tuple[int, int]:
+        """
+        Scale coordinates from display size to actual screen size.
+        The API returns coordinates in display_size (e.g., 1024x768).
+        We need to scale them to actual screen_size (e.g., 1920x1080) for execution.
+        Args:
+            x, y: Coordinates in display size space
+        Returns:
+            Scaled coordinates in actual screen size space
+        """
+        scaled_x = int(x * self.resize_factor[0])
+        scaled_y = int(y * self.resize_factor[1])
+        return scaled_x, scaled_y
+    async def plan_action(
+        self,
+        task_description: str,
+        screenshot: bytes,
+        action_history: List[Dict[str, Any]] = None,
+    ) -> Tuple[Optional[str], List[str]]:
+        """
+        Plan next action based on task and current screenshot.
+        Includes prompt caching, error handling, and backup API key support.
+        Args:
+            task_description: Task to accomplish
+            screenshot: Current screenshot (PNG bytes)
+            action_history: Previous actions (for context)
+        Returns:
+            Tuple of (reasoning, list of pyautogui commands)
+        """
+        # Resize screenshot
+        resized_screenshot = self._resize_screenshot(screenshot)
+        screenshot_b64 = base64.b64encode(resized_screenshot).decode('utf-8')
+        # Initialize messages with first task + screenshot
+        if not self.messages:
+            # IMPORTANT: Image should come BEFORE text for better model understanding
+            # This matches OSWorld's implementation which has proven effectiveness
+            self.messages.append({
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": "image/png",
+                            "data": screenshot_b64,
+                        },
+                    },
+                    {"type": "text", "text": task_description},
+                ]
+            })
+        # Filter images BEFORE adding new screenshot to control message size
+        # This is critical to avoid exceeding the 25MB API limit
+        image_truncation_threshold = 10
+        if self.only_n_most_recent_images and len(self.messages) > 1:
+            # Reserve 1 slot for the screenshot we're about to add
+            maybe_filter_to_n_most_recent_images(
+                self.messages,
+                max(1, self.only_n_most_recent_images - 1),
+                min_removal_threshold=1,  # More aggressive filtering
+            )
+        # Add tool result from previous action if exists
+        if self.messages and self.messages[-1]["role"] == "assistant":
+            last_content = self.messages[-1]["content"]
+            if isinstance(last_content, list) and any(
+                block.get("type") == "tool_use" for block in last_content
+            ):
+                tool_use_id = next(
+                    block["id"] for block in last_content
+                    if block.get("type") == "tool_use"
+                )
+                self._add_tool_result(tool_use_id, "Success", resized_screenshot)
+        # Define tools and betas for claude-sonnet-4-5 with computer-use-2025-01-24
+        tools = [{
+            'name': 'computer',
+            'type': 'computer_20250124',
+            'display_width_px': self.display_size[0],
+            'display_height_px': self.display_size[1],
+            'display_number': 1
+        }]
+        betas = [COMPUTER_USE_BETA_FLAG]
+        # Prepare system prompt with optional caching
+        system = BetaTextBlockParam(
+            type="text",
+            text=get_system_prompt(self.platform)
+        )
+        # Enable prompt caching if supported and enabled
+        if self.enable_prompt_caching:
+            betas.append(PROMPT_CACHING_BETA_FLAG)
+            inject_prompt_caching(self.messages)
+            system["cache_control"] = {"type": "ephemeral"}  # type: ignore
+        # Model name - use claude-sonnet-4-5 directly
+        model_name = "claude-sonnet-4-5"
+        # Enable thinking for complex computer use tasks
+        extra_body = {"thinking": {"type": "enabled", "budget_tokens": 2048}}
+        # Log request details for debugging
+        # Count current images in messages
+        total_images = sum(
+            1
+            for message in self.messages
+            for item in (message.get("content", []) if isinstance(message.get("content"), list) else [])
+            if isinstance(item, dict) and item.get("type") == "image"
+        )
+        tool_result_images = sum(
+            1
+            for message in self.messages
+            for item in (message.get("content", []) if isinstance(message.get("content"), list) else [])
+            if isinstance(item, dict) and item.get("type") == "tool_result"
+            for content in item.get("content", [])
+            if isinstance(content, dict) and content.get("type") == "image"
+        )
+        logger.info(
+            f"Anthropic API request:\n"
+            f"  Model: {model_name}\n"
+            f"  Display Size: {self.display_size}\n"
+            f"  Betas: {betas}\n"
+            f"  Images: {total_images} ({tool_result_images} in tool_results)\n"
+            f"  Messages: {len(self.messages)}"
+        )
+        # Try API call with retry and backup
+        client = self._create_client()
+        response = None
+        try:
+            # Retry loop with automatic image count reduction on 25MB error
+            for attempt in range(API_RETRY_TIMES):
+                try:
+                    response = client.beta.messages.create(
+                        max_tokens=self.max_tokens,
+                        messages=self.messages,
+                        model=model_name,
+                        system=[system],
+                        tools=tools,
+                        betas=betas,
+                        extra_body=extra_body
+                    )
+                    logger.info(f"API call succeeded on attempt {attempt + 1}")
+                    break
+                except (APIError, APIStatusError, APIResponseValidationError) as e:
+                    error_msg = str(e)
+                    logger.warning(f"Anthropic API error (attempt {attempt+1}/{API_RETRY_TIMES}): {error_msg}")
+                    # Handle 25MB payload limit error (including HTTP 413)
+                    if ("25000000" in error_msg or
+                        "Member must have length less than or equal to" in error_msg or
+                        "request_too_large" in error_msg or
+                        "413" in str(e)):
+                        logger.warning("Detected 25MB limit error, reducing image count")
+                        current_count = self.only_n_most_recent_images
+                        new_count = max(1, current_count // 2)
+                        self.only_n_most_recent_images = new_count
+                        maybe_filter_to_n_most_recent_images(
+                            self.messages,
+                            new_count,
+                            min_removal_threshold=1,  # Aggressive filtering when hitting limit
+                        )
+                        logger.info(f"Image count reduced from {current_count} to {new_count}")
+                    if attempt < API_RETRY_TIMES - 1:
+                        time.sleep(API_RETRY_INTERVAL)
+                    else:
+                        raise
+        except (APIError, APIStatusError, APIResponseValidationError) as e:
+            logger.error(f"Primary API key failed: {e}")
+            # Try backup API key if available
+            if self.backup_api_key:
+                logger.warning("Retrying with backup API key...")
+                try:
+                    backup_client = self._create_client(self.backup_api_key)
+                    response = backup_client.beta.messages.create(
+                        max_tokens=self.max_tokens,
+                        messages=self.messages,
+                        model=model_name,
+                        system=[system],
+                        tools=tools,
+                        betas=betas,
+                        extra_body=extra_body
+                    )
+                    logger.info("Successfully used backup API key")
+                except Exception as backup_e:
+                    logger.error(f"Backup API key also failed: {backup_e}")
+                    return None, ["FAIL"]
+            else:
+                return None, ["FAIL"]
+        except Exception as e:
+            logger.error(f"Unexpected error: {e}")
+            return None, ["FAIL"]
+        if not response:
+            return None, ["FAIL"]
+        # Parse response using utility function
+        response_params = response_to_params(response)
+        # Extract reasoning and commands
+        reasoning = ""
+        commands = []
+        for block in response_params:
+            block_type = block.get("type")
+            if block_type == "text":
+                reasoning = block.get("text", "")
+            elif block_type == "thinking":
+                reasoning = block.get("thinking", "")
+            elif block_type == "tool_use":
+                tool_input = block.get("input", {})
+                command = self._parse_computer_tool_use(tool_input)
+                if command:
+                    commands.append(command)
+                else:
+                    logger.warning(f"Failed to parse tool_use: {tool_input}")
+        # Store assistant response
+        self.messages.append({
+            "role": "assistant",
+            "content": response_params
+        })
+        logger.info(f"Parsed {len(commands)} commands from response")
+        return reasoning, commands
+    def _add_tool_result(
+        self,
+        tool_use_id: str,
+        result: str,
+        screenshot_bytes: Optional[bytes] = None
+    ):
+        """
+        Add tool result to message history.
+        IMPORTANT: Put screenshot BEFORE text for consistency with initial message.
+        """
+        # Build content list with image first (if provided), then text
+        content_list = []
+        # Add screenshot first if provided (consistent with initial message ordering)
+        if screenshot_bytes is not None:
+            screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
+            content_list.append({
+                "type": "image",
+                "source": {
+                    "type": "base64",
+                    "media_type": "image/png",
+                    "data": screenshot_b64
+                }
+            })
+        # Then add text result
+        content_list.append({"type": "text", "text": result})
+        tool_result_content = [{
+            "type": "tool_result",
+            "tool_use_id": tool_use_id,
+            "content": content_list
+        }]
+        self.messages.append({
+            "role": "user",
+            "content": tool_result_content
+        })
+    def _parse_computer_tool_use(self, tool_input: Dict[str, Any]) -> Optional[str]:
+        """
+        Parse Anthropic computer tool use to pyautogui command.
+        Args:
+            tool_input: Tool input from Anthropic (action, coordinate, text, etc.)
+        Returns:
+            PyAutoGUI command string or control command (DONE, FAIL)
+        """
+        action = tool_input.get("action")
+        if not action:
+            return None
+        # Action conversion
+        action_conversion = {
+            "left click": "click",
+            "right click": "right_click"
+        }
+        action = action_conversion.get(action, action)
+        text = tool_input.get("text")
+        coordinate = tool_input.get("coordinate")
+        scroll_direction = tool_input.get("scroll_direction")
+        scroll_amount = tool_input.get("scroll_amount", 5)
+        # Scale coordinates to actual screen size
+        if coordinate:
+            coordinate = self._scale_coordinates(coordinate[0], coordinate[1])
+        # Build commands
+        command = ""
+        if action == "mouse_move":
+            if coordinate:
+                x, y = coordinate
+                command = f"pyautogui.moveTo({x}, {y}, duration=0.5)"
+        elif action in ("left_click", "click"):
+            if coordinate:
+                x, y = coordinate
+                command = f"pyautogui.click({x}, {y})"
+            else:
+                command = "pyautogui.click()"
+        elif action == "right_click":
+            if coordinate:
+                x, y = coordinate
+                command = f"pyautogui.rightClick({x}, {y})"
+            else:
+                command = "pyautogui.rightClick()"
+        elif action == "double_click":
+            if coordinate:
+                x, y = coordinate
+                command = f"pyautogui.doubleClick({x}, {y})"
+            else:
+                command = "pyautogui.doubleClick()"
+        elif action == "middle_click":
+            if coordinate:
+                x, y = coordinate
+                command = f"pyautogui.middleClick({x}, {y})"
+            else:
+                command = "pyautogui.middleClick()"
+        elif action == "left_click_drag":
+            if coordinate:
+                x, y = coordinate
+                command = f"pyautogui.dragTo({x}, {y}, duration=0.5)"
+        elif action == "key":
+            if text:
+                keys = text.split('+')
+                # Key conversion
+                key_conversion = {
+                    "page_down": "pagedown",
+                    "page_up": "pageup",
+                    "super_l": "win",
+                    "super": "command",
+                    "escape": "esc"
+                }
+                converted_keys = [key_conversion.get(k.strip().lower(), k.strip().lower()) for k in keys]
+                # Press and release keys
+                for key in converted_keys:
+                    command += f"pyautogui.keyDown('{key}'); "
+                for key in reversed(converted_keys):
+                    command += f"pyautogui.keyUp('{key}'); "
+                # Remove trailing semicolon and space
+                command = command.rstrip('; ')
+        elif action == "type":
+            if text:
+                command = f"pyautogui.typewrite({repr(text)}, interval=0.01)"
+        elif action == "scroll":
+            if scroll_direction in ("up", "down"):
+                scroll_value = scroll_amount if scroll_direction == "up" else -scroll_amount
+                if coordinate:
+                    x, y = coordinate
+                    command = f"pyautogui.scroll({scroll_value}, {x}, {y})"
+                else:
+                    command = f"pyautogui.scroll({scroll_value})"
+            elif scroll_direction in ("left", "right"):
+                scroll_value = scroll_amount if scroll_direction == "right" else -scroll_amount
+                if coordinate:
+                    x, y = coordinate
+                    command = f"pyautogui.hscroll({scroll_value}, {x}, {y})"
+                else:
+                    command = f"pyautogui.hscroll({scroll_value})"
+        elif action == "screenshot":
+            # Screenshot is automatically handled by the system
+            # Return special marker to indicate no action needed
+            return "SCREENSHOT"
+        elif action == "wait":
+            # Wait for specified duration
+            duration = tool_input.get("duration", 1)
+            command = f"pyautogui.sleep({duration})"
+        elif action == "done":
+            return "DONE"
+        elif action == "fail":
+            return "FAIL"
+        return command if command else None
+    def reset(self):
+        """Reset message history."""
+        self.messages = []
+        logger.info("Reset AnthropicGUIClient message history")

openspace/grounding/backends/gui/anthropic_utils.py ADDED Viewed

	@@ -0,0 +1,241 @@

+from typing import List, cast
+from enum import Enum
+from datetime import datetime
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+try:
+    from anthropic.types.beta import (
+        BetaCacheControlEphemeralParam,
+        BetaContentBlockParam,
+        BetaImageBlockParam,
+        BetaMessage,
+        BetaMessageParam,
+        BetaTextBlock,
+        BetaTextBlockParam,
+        BetaToolResultBlockParam,
+        BetaToolUseBlockParam,
+    )
+    ANTHROPIC_AVAILABLE = True
+except ImportError:
+    ANTHROPIC_AVAILABLE = False
+# Beta flags
+# For claude-sonnet-4-5 with computer-use-2025-01-24
+COMPUTER_USE_BETA_FLAG = "computer-use-2025-01-24"
+PROMPT_CACHING_BETA_FLAG = "prompt-caching-2024-07-31"
+class APIProvider(Enum):
+    """API Provider enumeration"""
+    ANTHROPIC = "anthropic"
+    # BEDROCK = "bedrock"
+    # VERTEX = "vertex"
+# Provider to model name mapping (simplified for claude-sonnet-4-5 only)
+PROVIDER_TO_DEFAULT_MODEL_NAME: dict = {
+    (APIProvider.ANTHROPIC, "claude-sonnet-4-5"): "claude-sonnet-4-5",
+    # (APIProvider.BEDROCK, "claude-sonnet-4-5"): "us.anthropic.claude-sonnet-4-5-v1:0",
+    # (APIProvider.VERTEX, "claude-sonnet-4-5"): "claude-sonnet-4-5-v1",
+}
+def get_system_prompt(platform: str = "Ubuntu") -> str:
+    """
+    Get system prompt based on platform.
+    Args:
+        platform: Platform type (Ubuntu, Windows, macOS, or Darwin)
+    Returns:
+        System prompt string
+    """
+    # Normalize platform name
+    platform_lower = platform.lower()
+    if platform_lower in ["windows", "win32"]:
+        return f"""<SYSTEM_CAPABILITY>
+* You are utilising a Windows virtual machine using x86_64 architecture with internet access.
+* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
+* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
+* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
+* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
+* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
+* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
+* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
+* Home directory of this Windows system is 'C:\\Users\\user'.
+* When you want to open some applications on Windows, please use Double Click on it instead of clicking once.
+* After each action, the system will provide you with a new screenshot showing the result.
+* Continue taking actions until the task is complete.
+</SYSTEM_CAPABILITY>"""
+    elif platform_lower in ["macos", "darwin", "mac"]:
+        return f"""<SYSTEM_CAPABILITY>
+* You are utilising a macOS system with internet access.
+* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
+* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
+* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
+* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
+* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
+* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
+* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
+* Home directory of this macOS system is typically '/Users/[username]' or can be accessed via '~'.
+* On macOS, use Command (⌘) key combinations instead of Ctrl (e.g., Command+C for copy).
+* After each action, the system will provide you with a new screenshot showing the result.
+* Continue taking actions until the task is complete.
+* When the task is completed, simply describe what you've done in your response WITHOUT using the tool again.
+</SYSTEM_CAPABILITY>"""
+    else:  # Ubuntu/Linux
+        return f"""<SYSTEM_CAPABILITY>
+* You are utilising an Ubuntu virtual machine using x86_64 architecture with internet access.
+* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
+* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
+* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
+* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
+* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
+* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
+* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
+* Home directory of this Ubuntu system is '/home/user'.
+* After each action, the system will provide you with a new screenshot showing the result.
+* Continue taking actions until the task is complete.
+</SYSTEM_CAPABILITY>"""
+def inject_prompt_caching(messages: List[BetaMessageParam]) -> None:
+    """
+    Set cache breakpoints for the 3 most recent turns.
+    One cache breakpoint is left for tools/system prompt, to be shared across sessions.
+    Args:
+        messages: Message history (modified in place)
+    """
+    if not ANTHROPIC_AVAILABLE:
+        return
+    breakpoints_remaining = 3
+    for message in reversed(messages):
+        if message["role"] == "user" and isinstance(
+            content := message["content"], list
+        ):
+            if breakpoints_remaining:
+                breakpoints_remaining -= 1
+                # Use type ignore to bypass TypedDict check until SDK types are updated
+                content[-1]["cache_control"] = BetaCacheControlEphemeralParam(  # type: ignore
+                    {"type": "ephemeral"}
+                )
+            else:
+                content[-1].pop("cache_control", None)
+                # we'll only ever have one extra turn per loop
+                break
+def maybe_filter_to_n_most_recent_images(
+    messages: List[BetaMessageParam],
+    images_to_keep: int,
+    min_removal_threshold: int,
+) -> None:
+    """
+    With the assumption that images are screenshots that are of diminishing value as
+    the conversation progresses, remove all but the final `images_to_keep` tool_result
+    images in place, with a chunk of min_removal_threshold to reduce the amount we
+    break the implicit prompt cache.
+    Args:
+        messages: Message history (modified in place)
+        images_to_keep: Number of recent images to keep
+        min_removal_threshold: Minimum number of images to remove at once (for cache efficiency)
+    """
+    if not ANTHROPIC_AVAILABLE or images_to_keep is None:
+        return
+    tool_result_blocks = cast(
+        list[BetaToolResultBlockParam],
+        [
+            item
+            for message in messages
+            for item in (
+                message["content"] if isinstance(message["content"], list) else []
+            )
+            if isinstance(item, dict) and item.get("type") == "tool_result"
+        ],
+    )
+    total_images = sum(
+        1
+        for tool_result in tool_result_blocks
+        for content in tool_result.get("content", [])
+        if isinstance(content, dict) and content.get("type") == "image"
+    )
+    images_to_remove = total_images - images_to_keep
+    # for better cache behavior, we want to remove in chunks
+    images_to_remove -= images_to_remove % min_removal_threshold
+    for tool_result in tool_result_blocks:
+        if isinstance(tool_result.get("content"), list):
+            new_content = []
+            for content in tool_result.get("content", []):
+                if isinstance(content, dict) and content.get("type") == "image":
+                    if images_to_remove > 0:
+                        images_to_remove -= 1
+                        continue
+                new_content.append(content)
+            tool_result["content"] = new_content
+def response_to_params(response: BetaMessage) -> List[BetaContentBlockParam]:
+    """
+    Convert Anthropic response to parameter list.
+    Handles both text blocks, tool use blocks, and thinking blocks.
+    Args:
+        response: Anthropic API response
+    Returns:
+        List of content blocks
+    """
+    if not ANTHROPIC_AVAILABLE:
+        return []
+    res: List[BetaContentBlockParam] = []
+    if response.content:
+        for block in response.content:
+            # Check block type using type attribute
+            # Note: type may be a string or enum, so convert to string for comparison
+            block_type = str(getattr(block, "type", ""))
+            if block_type == "text":
+                # Regular text block
+                if isinstance(block, BetaTextBlock) and block.text:
+                    res.append(BetaTextBlockParam(type="text", text=block.text))
+            elif block_type == "thinking":
+                # Thinking block (for Claude 4 and Sonnet 3.7)
+                thinking_block = {
+                    "type": "thinking",
+                    "thinking": getattr(block, "thinking", ""),
+                }
+                if hasattr(block, "signature"):
+                    thinking_block["signature"] = getattr(block, "signature", None)
+                res.append(cast(BetaContentBlockParam, thinking_block))
+            elif block_type == "tool_use":
+                # Tool use block - only include required fields to avoid API errors
+                # (e.g., 'caller' field is not permitted by Anthropic API)
+                tool_use_dict = {
+                    "type": "tool_use",
+                    "id": block.id,
+                    "name": block.name,
+                    "input": block.input,
+                }
+                res.append(cast(BetaToolUseBlockParam, tool_use_dict))
+            else:
+                # Unknown block type - try to handle generically
+                try:
+                    res.append(cast(BetaContentBlockParam, block.model_dump()))
+                except Exception as e:
+                    logger.warning(f"Failed to parse block type {block_type}: {e}")
+        return res
+    else:
+        return []

openspace/grounding/backends/gui/config.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from typing import Dict, Any, Optional
+import os
+import platform as platform_module
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+def build_llm_config(user_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+    """
+    Build complete LLM configuration with auto-detection and environment variables.
+    Auto-detects:
+    - API key from environment variables (ANTHROPIC_API_KEY)
+    - Platform from system (macOS/Windows/Ubuntu)
+    - Provider defaults to 'anthropic'
+    User-provided config values will override auto-detected values.
+    Args:
+        user_config: User-provided configuration (optional)
+    Returns:
+        Complete LLM configuration dict
+    Example:
+        >>> # Auto-detect everything
+        >>> config = build_llm_config()
+        >>> # Override specific values
+        >>> config = build_llm_config({
+        ...     "model": "claude-3-5-sonnet-20241022",
+        ...     "max_tokens": 8192
+        ... })
+    """
+    if user_config is None:
+        user_config = {}
+    # Auto-detect platform
+    system = platform_module.system()
+    if system == "Darwin":
+        detected_platform = "macOS"
+    elif system == "Windows":
+        detected_platform = "Windows"
+    else:  # Linux
+        detected_platform = "Ubuntu"
+    # Auto-detect API key from environment
+    api_key = os.environ.get("ANTHROPIC_API_KEY")
+    if not api_key:
+        logger.warning(
+            "ANTHROPIC_API_KEY not found in environment. "
+            "Please set it: export ANTHROPIC_API_KEY='your-key'"
+        )
+    # Build configuration with precedence: user_config > auto-detected > defaults
+    config = {
+        "type": user_config.get("type", "anthropic"),
+        "model": user_config.get("model", "claude-sonnet-4-5"),
+        "platform": user_config.get("platform", detected_platform),
+        "api_key": user_config.get("api_key", api_key),
+        "provider": user_config.get("provider", "anthropic"),
+        "max_tokens": user_config.get("max_tokens", 4096),
+        "only_n_most_recent_images": user_config.get("only_n_most_recent_images", 3),
+        "enable_prompt_caching": user_config.get("enable_prompt_caching", True),
+    }
+    # Optional: screen_size (will be auto-detected from screenshot later)
+    if "screen_size" in user_config:
+        config["screen_size"] = user_config["screen_size"]
+    logger.info(f"Built LLM config - Platform: {config['platform']}, Model: {config['model']}")
+    if config["api_key"]:
+        logger.info(f"API key loaded: {config['api_key'][:10]}...")
+    return config

openspace/grounding/backends/gui/provider.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from typing import Dict, Any, Union
+from openspace.grounding.core.types import BackendType, SessionConfig
+from openspace.grounding.core.provider import Provider
+from openspace.grounding.core.session import BaseSession
+from openspace.config import get_config
+from openspace.config.utils import get_config_value
+from openspace.platforms import get_local_server_config
+from openspace.utils.logging import Logger
+from .transport.connector import GUIConnector
+from .transport.local_connector import LocalGUIConnector
+from .session import GUISession
+logger = Logger.get_logger(__name__)
+class GUIProvider(Provider):
+    """
+    Provider for GUI desktop environment.
+    Manages communication with desktop_env through HTTP API or local in-process execution.
+    Supports two modes:
+    - "local": Execute GUI operations directly in-process (no server needed)
+    - "server": Connect to a running local_server via HTTP API
+    Supports automatic default session creation:
+    - If no session exists, a default session will be created on first use
+    - Default session uses configuration from config file or environment
+    """
+    DEFAULT_SID = BackendType.GUI.value
+    def __init__(self, config: Dict[str, Any] = None):
+        """
+        Initialize GUI provider.
+        Args:
+            config: Provider configuration
+        """
+        super().__init__(BackendType.GUI, config)
+        self.connectors: Dict[str, Union[GUIConnector, LocalGUIConnector]] = {}
+    async def initialize(self) -> None:
+        """
+        Initialize the provider and create default session.
+        """
+        if not self.is_initialized:
+            logger.info("Initializing GUI provider")
+            # Auto-create default session
+            await self.create_session(SessionConfig(
+                session_name=self.DEFAULT_SID,
+                backend_type=BackendType.GUI,
+                connection_params={}
+            ))
+            self.is_initialized = True
+    async def create_session(self, session_config: SessionConfig) -> BaseSession:
+        """
+        Create GUI session.
+        Args:
+            session_config: Session configuration
+        Returns:
+            GUISession instance
+        """
+        # Load GUI backend configuration
+        gui_config = get_config().get_backend_config("gui")
+        # Determine execution mode: "local" or "server"
+        mode = getattr(gui_config, "mode", "local")
+        # Extract connection parameters
+        conn_params = session_config.connection_params
+        timeout = get_config_value(conn_params, 'timeout', gui_config.timeout)
+        retry_times = get_config_value(conn_params, 'retry_times', gui_config.max_retries)
+        retry_interval = get_config_value(conn_params, 'retry_interval', gui_config.retry_interval)
+        # Build pkgs_prefix with failsafe setting
+        failsafe_str = "True" if gui_config.failsafe else "False"
+        pkgs_prefix = get_config_value(
+            conn_params,
+            'pkgs_prefix',
+            gui_config.pkgs_prefix.format(failsafe=failsafe_str, command="{command}")
+        )
+        if mode == "local":
+            # ---------- LOCAL MODE ----------
+            logger.info("GUI backend using LOCAL mode (no server required)")
+            connector = LocalGUIConnector(
+                timeout=timeout,
+                retry_times=retry_times,
+                retry_interval=retry_interval,
+                pkgs_prefix=pkgs_prefix,
+            )
+        else:
+            # ---------- SERVER MODE ----------
+            logger.info("GUI backend using SERVER mode (connecting to local_server)")
+            local_server_config = get_local_server_config()
+            vm_ip = get_config_value(conn_params, 'vm_ip', local_server_config['host'])
+            server_port = get_config_value(conn_params, 'server_port', local_server_config['port'])
+            connector = GUIConnector(
+                vm_ip=vm_ip,
+                server_port=server_port,
+                timeout=timeout,
+                retry_times=retry_times,
+                retry_interval=retry_interval,
+                pkgs_prefix=pkgs_prefix,
+            )
+        # Create session
+        session = GUISession(
+            connector=connector,
+            session_id=session_config.session_name,
+            backend_type=BackendType.GUI,
+            config=session_config,
+        )
+        # Store connector and session
+        self.connectors[session_config.session_name] = connector
+        self._sessions[session_config.session_name] = session
+        logger.info(f"Created GUI session: {session_config.session_name} (mode={mode})")
+        return session
+    async def close_session(self, session_name: str) -> None:
+        """
+        Close GUI session.
+        Args:
+            session_name: Name of the session to close
+        """
+        if session_name in self._sessions:
+            session = self._sessions[session_name]
+            await session.disconnect()
+            del self._sessions[session_name]
+        if session_name in self.connectors:
+            connector = self.connectors[session_name]
+            await connector.disconnect()
+            del self.connectors[session_name]
+        logger.info(f"Closed GUI session: {session_name}")

openspace/grounding/backends/gui/session.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from typing import Dict, Any, Union
+import os
+from openspace.grounding.core.session import BaseSession
+from openspace.grounding.core.types import BackendType, SessionStatus, SessionConfig
+from openspace.utils.logging import Logger
+from .transport.connector import GUIConnector
+from .transport.local_connector import LocalGUIConnector
+from .tool import GUIAgentTool
+from .config import build_llm_config
+logger = Logger.get_logger(__name__)
+class GUISession(BaseSession):
+    """
+    Session for GUI desktop environment.
+    Manages connection and tools for GUI automation.
+    """
+    def __init__(
+        self,
+        connector: Union[GUIConnector, LocalGUIConnector],
+        session_id: str,
+        backend_type: BackendType.GUI,
+        config: SessionConfig,
+        auto_connect: bool = True,
+        auto_initialize: bool = True,
+    ):
+        """
+        Initialize GUI session.
+        Args:
+            connector: GUI HTTP connector
+            session_id: Unique session identifier
+            backend_type: Backend type (GUI)
+            config: Session configuration
+            auto_connect: Auto-connect on context enter
+            auto_initialize: Auto-initialize on context enter
+        """
+        super().__init__(
+            connector=connector,
+            session_id=session_id,
+            backend_type=backend_type,
+            auto_connect=auto_connect,
+            auto_initialize=auto_initialize,
+        )
+        self.config = config
+        self.gui_connector = connector
+    async def initialize(self) -> Dict[str, Any]:
+        """
+        Initialize session: connect and discover tools.
+        Returns:
+            Session information dict
+        """
+        logger.info(f"Initializing GUI session: {self.session_id}")
+        # Ensure connected
+        if not self.connector.is_connected:
+            await self.connect()
+        # Create LLM client if configured
+        llm_client = None
+        user_llm_config = self.config.connection_params.get("llm_config")
+        # Build complete LLM config with auto-detection
+        # If user provides llm_config, merge with auto-detected values
+        # If user doesn't provide llm_config, try to auto-build one if ANTHROPIC_API_KEY exists
+        if user_llm_config or os.environ.get("ANTHROPIC_API_KEY"):
+            llm_config = build_llm_config(user_llm_config)
+            if llm_config.get("type") == "anthropic":
+                # Check if API key is available
+                if not llm_config.get("api_key"):
+                    logger.warning(
+                        "Anthropic API key not found. Skipping LLM client initialization. "
+                        "Set ANTHROPIC_API_KEY environment variable or provide api_key in llm_config."
+                    )
+                else:
+                    try:
+                        from .anthropic_client import AnthropicGUIClient
+                        # Detect actual screen size from screenshot (most accurate)
+                        # PyAutoGUI may report logical resolution, but we need the actual screenshot size
+                        try:
+                            screenshot_bytes = await self.gui_connector.get_screenshot()
+                            if screenshot_bytes:
+                                from PIL import Image
+                                import io
+                                img = Image.open(io.BytesIO(screenshot_bytes))
+                                actual_screen_size = img.size
+                                logger.info(f"Auto-detected screen size from screenshot: {actual_screen_size}")
+                                screen_size = actual_screen_size
+                            else:
+                                raise RuntimeError("Could not get screenshot")
+                        except Exception as e:
+                            # Fallback to pyautogui detection
+                            actual_screen_size = await self.gui_connector.get_screen_size()
+                            if actual_screen_size:
+                                logger.info(f"Auto-detected screen size from pyautogui: {actual_screen_size}")
+                                screen_size = actual_screen_size
+                            else:
+                                # Final fallback to configured value
+                                screen_size = llm_config.get("screen_size", (1920, 1080))
+                                logger.warning(f"Could not auto-detect screen size, using configured: {screen_size}")
+                        # Detect PyAutoGUI working size (logical pixels)
+                        pyautogui_size = await self.gui_connector.get_screen_size()
+                        if pyautogui_size:
+                            logger.info(f"PyAutoGUI working size (logical): {pyautogui_size}")
+                        else:
+                            # If we can't detect PyAutoGUI size, assume it's the same as screen size
+                            pyautogui_size = screen_size
+                            logger.warning(f"Could not detect PyAutoGUI size, assuming same as screen: {pyautogui_size}")
+                        llm_client = AnthropicGUIClient(
+                            model=llm_config["model"],
+                            platform=llm_config["platform"],
+                            api_key=llm_config["api_key"],
+                            provider=llm_config["provider"],
+                            screen_size=screen_size,
+                            pyautogui_size=pyautogui_size,
+                            max_tokens=llm_config["max_tokens"],
+                            only_n_most_recent_images=llm_config["only_n_most_recent_images"],
+                        )
+                        logger.info(
+                            f"Initialized Anthropic LLM client - "
+                            f"Model: {llm_config['model']}, Platform: {llm_config['platform']}"
+                        )
+                    except Exception as e:
+                        logger.warning(f"Failed to initialize Anthropic client: {e}")
+        # Get recording_manager from connection_params if available
+        recording_manager = self.config.connection_params.get("recording_manager")
+        # Create GUI Agent Tool
+        self.tools = [
+            GUIAgentTool(
+                connector=self.gui_connector,
+                llm_client=llm_client,
+                recording_manager=recording_manager
+            )
+        ]
+        logger.info(f"Initialized GUI session with {len(self.tools)} tool(s)")
+        # Return session info
+        session_info = {
+            "session_id": self.session_id,
+            "backend_type": self.backend_type.value,
+            "vm_ip": self.gui_connector.vm_ip,
+            "server_port": self.gui_connector.server_port,
+            "num_tools": len(self.tools),
+            "tools": [tool.name for tool in self.tools],
+            "llm_client": "anthropic" if llm_client else "none",
+        }
+        return session_info
+    async def connect(self) -> None:
+        """Connect to GUI desktop environment"""
+        if self.connector.is_connected:
+            return
+        self.status = SessionStatus.CONNECTING
+        logger.info(f"Connecting to desktop_env at {self.gui_connector.base_url}")
+        await self.connector.connect()
+        self.status = SessionStatus.CONNECTED
+        logger.info("Connected to desktop environment")
+    async def disconnect(self) -> None:
+        """Disconnect from GUI desktop environment"""
+        if not self.connector.is_connected:
+            return
+        logger.info("Disconnecting from desktop environment")
+        await self.connector.disconnect()
+        self.status = SessionStatus.DISCONNECTED
+        logger.info("Disconnected from desktop environment")
+    @property
+    def is_connected(self) -> bool:
+        """Check if session is connected"""
+        return self.connector.is_connected

openspace/grounding/backends/gui/tool.py ADDED Viewed

	@@ -0,0 +1,712 @@

+import base64
+from typing import Any, Dict
+from openspace.grounding.core.tool.base import BaseTool
+from openspace.grounding.core.types import BackendType, ToolResult, ToolStatus
+from .transport.connector import GUIConnector
+from .transport.actions import ACTION_SPACE, KEYBOARD_KEYS
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+class GUIAgentTool(BaseTool):
+    """
+    LLM-powered GUI Agent Tool.
+    This tool acts as an intelligent agent that:
+    - Takes a task description as input
+    - Observes the desktop via screenshot
+    - Uses LLM/VLM to understand and plan actions
+    - Outputs action space commands
+    - Executes actions through the connector
+    """
+    _name = "gui_agent"
+    _description = """Vision-based GUI automation agent for tasks requiring graphical interface interaction.
+    Use this tool when the task involves:
+    - Operating desktop applications with graphical interfaces (browsers, editors, design tools, etc.)
+    - Tasks that require visual understanding of UI elements, layouts, or content
+    - Multi-step workflows that need click, drag, type, or other GUI interactions
+    - Scenarios where programmatic APIs or command-line tools are unavailable or insufficient
+    The agent observes screen state through screenshots, uses vision-language models to understand
+    the interface, plans appropriate actions, and executes GUI operations autonomously.
+    IMPORTANT - max_steps Parameter Guidelines:
+    - Simple tasks (1-2 actions): 15-20 steps
+    - Medium tasks (3-5 actions): 25-35 steps
+    - Complex tasks (6+ actions, like web navigation): 35-50 steps
+    - When uncertain, prefer larger values (35+) to avoid premature termination
+    - Default is 25, but increase for multi-step workflows
+    Input:
+    - task_description: Natural language task description
+    - max_steps: Maximum actions (default 25, increase for complex tasks)
+    Output: Task execution results with action history and completion status
+    """
+    backend_type = BackendType.GUI
+    def __init__(self, connector: GUIConnector, llm_client=None, recording_manager=None, **kwargs):
+        """
+        Initialize GUI Agent Tool.
+        Args:
+            connector: GUI connector for communication with desktop_env
+            llm_client: LLM/VLM client for vision-based planning (optional)
+            recording_manager: RecordingManager for recording intermediate steps (optional)
+            **kwargs: Additional arguments for BaseTool
+        """
+        super().__init__(**kwargs)
+        self.connector = connector
+        self.llm_client = llm_client  # Will be injected later
+        self.recording_manager = recording_manager  # For recording intermediate steps
+        self.action_history = []  # Track executed actions
+    async def _arun(
+        self,
+        task_description: str,
+        max_steps: int = 50,
+    ) -> ToolResult:
+        """
+        Execute a GUI automation task using LLM planning.
+        This is the main entry point that:
+        1. Gets current screenshot
+        2. Uses LLM to plan next action based on task and screenshot
+        3. Executes the planned action
+        4. Repeats until task is complete or max_steps reached
+        Args:
+            task_description: Natural language description of the task
+            max_steps: Maximum number of actions to execute (default 25)
+                Recommended values based on task complexity:
+                - Simple (1-2 actions): 15-20
+                - Medium (3-5 actions): 25-35
+                - Complex (6+ actions, web navigation, multi-app): 35-50
+                When in doubt, use higher values to avoid premature termination
+        Returns:
+            ToolResult with task execution status
+        """
+        if not task_description:
+            return ToolResult(
+                status=ToolStatus.ERROR,
+                error="task_description is required"
+            )
+        logger.info(f"Starting GUI task: {task_description}")
+        self.action_history = []
+        # Execute task with LLM planning loop
+        try:
+            result = await self._execute_task_with_planning(
+                task_description=task_description,
+                max_steps=max_steps,
+            )
+            return result
+        except Exception as e:
+            logger.error(f"Task execution failed: {e}")
+            return ToolResult(
+                status=ToolStatus.ERROR,
+                error=str(e),
+                metadata={
+                    "task_description": task_description,
+                    "actions_executed": len(self.action_history),
+                    "action_history": self.action_history,
+                }
+            )
+    async def _execute_task_with_planning(
+        self,
+        task_description: str,
+        max_steps: int,
+    ) -> ToolResult:
+        """
+        Execute task with LLM-based planning loop.
+        Planning loop:
+        1. Observe: Get screenshot
+        2. Plan: LLM decides next action
+        3. Execute: Perform the action
+        4. Verify: Check if task is complete
+        5. Repeat until done or max_steps
+        Args:
+            task_description: Task to complete
+            max_steps: Maximum planning iterations
+        Returns:
+            ToolResult with execution details
+        """
+        # Collect all screenshots for visual analysis
+        all_screenshots = []
+        # Collect intermediate steps
+        intermediate_steps = []
+        for step in range(max_steps):
+            logger.info(f"Planning step {step + 1}/{max_steps}")
+            # Step 1: Observe current state
+            screenshot = await self.connector.get_screenshot()
+            if not screenshot:
+                return ToolResult(
+                    status=ToolStatus.ERROR,
+                    error="Failed to get screenshot for planning",
+                    metadata={"step": step, "action_history": self.action_history}
+                )
+            # Collect screenshot for visual analysis
+            all_screenshots.append(screenshot)
+            # Step 2: Plan next action using LLM
+            planned_action = await self._plan_next_action(
+                task_description=task_description,
+                screenshot=screenshot,
+                action_history=self.action_history,
+            )
+            # Check if task is complete
+            if planned_action["action_type"] == "DONE":
+                logger.info("Task marked as complete by LLM")
+                reasoning = planned_action.get("reasoning", "Task completed successfully")
+                intermediate_steps.append({
+                    "step_number": step + 1,
+                    "action": "DONE",
+                    "reasoning": reasoning,
+                    "status": "done",
+                })
+                return ToolResult(
+                    status=ToolStatus.SUCCESS,
+                    content=f"Task completed: {task_description}\n\nFinal state: {reasoning}",
+                    metadata={
+                        "steps_taken": step + 1,
+                        "action_history": self.action_history,
+                        "screenshots": all_screenshots,
+                        "intermediate_steps": intermediate_steps,
+                        "final_reasoning": reasoning,
+                    }
+                )
+            # Check if task failed
+            if planned_action["action_type"] == "FAIL":
+                logger.warning("Task marked as failed by LLM")
+                reason = planned_action.get("reason", "Task cannot be completed")
+                intermediate_steps.append({
+                    "step_number": step + 1,
+                    "action": "FAIL",
+                    "reasoning": planned_action.get("reasoning", ""),
+                    "status": "failed",
+                })
+                return ToolResult(
+                    status=ToolStatus.ERROR,
+                    error=reason,
+                    metadata={
+                        "steps_taken": step + 1,
+                        "action_history": self.action_history,
+                        "screenshots": all_screenshots,
+                        "intermediate_steps": intermediate_steps,
+                    }
+                )
+            # Check if action is WAIT (screenshot observation, continue to next step)
+            if planned_action["action_type"] == "WAIT":
+                logger.info("Screenshot observation step, continuing planning loop")
+                intermediate_steps.append({
+                    "step_number": step + 1,
+                    "action": "WAIT",
+                    "reasoning": planned_action.get("reasoning", ""),
+                    "status": "observation",
+                })
+                continue
+            # Step 3: Execute the planned action
+            execution_result = await self._execute_planned_action(planned_action)
+            # Record action in history
+            self.action_history.append({
+                "step": step + 1,
+                "planned_action": planned_action,
+                "execution_result": execution_result,
+            })
+            intermediate_steps.append({
+                "step_number": step + 1,
+                "action": planned_action.get("action_type", "unknown"),
+                "reasoning": planned_action.get("reasoning", ""),
+                "status": execution_result.get("status", "unknown"),
+            })
+            # Check execution result
+            if execution_result.get("status") != "success":
+                logger.warning(f"Action execution failed: {execution_result.get('error')}")
+                # Continue to next iteration for retry planning
+        # Max steps reached
+        return ToolResult(
+            status=ToolStatus.ERROR,
+            error=f"Task incomplete after {max_steps} steps",
+            metadata={
+                "task_description": task_description,
+                "steps_taken": max_steps,
+                "action_history": self.action_history,
+                "screenshots": all_screenshots,
+                "intermediate_steps": intermediate_steps,
+            }
+        )
+    async def _plan_next_action(
+        self,
+        task_description: str,
+        screenshot: bytes,
+        action_history: list,
+    ) -> Dict[str, Any]:
+        """
+        Use LLM/VLM to plan the next action.
+        This method sends:
+        - Task description
+        - Current screenshot (vision input)
+        - Action history (context)
+        - Available ACTION_SPACE
+        And gets back a structured action plan.
+        Args:
+            task_description: The task to accomplish
+            screenshot: Current desktop screenshot (PNG/JPEG bytes)
+            action_history: Previously executed actions
+        Returns:
+            Dict with action_type and parameters
+        """
+        if self.llm_client is None:
+            # Fallback: Simple heuristic or manual mode
+            logger.warning("No LLM client configured, using fallback mode")
+            return {
+                "action_type": "FAIL",
+                "reason": "LLM client not configured"
+            }
+        # Check if using Anthropic client
+        try:
+            from .anthropic_client import AnthropicGUIClient
+            is_anthropic = isinstance(self.llm_client, AnthropicGUIClient)
+        except ImportError:
+            is_anthropic = False
+        if is_anthropic:
+            # Use Anthropic client
+            try:
+                reasoning, commands = await self.llm_client.plan_action(
+                    task_description=task_description,
+                    screenshot=screenshot,
+                    action_history=action_history,
+                )
+                if commands == ["FAIL"]:
+                    return {
+                        "action_type": "FAIL",
+                        "reason": "Anthropic planning failed"
+                    }
+                if commands == ["DONE"]:
+                    return {
+                        "action_type": "DONE",
+                        "reasoning": reasoning
+                    }
+                if commands == ["SCREENSHOT"]:
+                    # Screenshot is automatically handled by system
+                    # Continue to next planning step
+                    logger.info("LLM requested screenshot (observation step)")
+                    return {
+                        "action_type": "WAIT",
+                        "reasoning": reasoning or "Observing screen state"
+                    }
+                # If no commands but has reasoning, task is complete
+                # (Anthropic returns text-only when task is done)
+                if not commands and reasoning:
+                    logger.info("LLM returned text-only response, interpreting as task completion")
+                    return {
+                        "action_type": "DONE",
+                        "reasoning": reasoning
+                    }
+                # No commands and no reasoning = error
+                if not commands:
+                    return {
+                        "action_type": "FAIL",
+                        "reason": "No commands generated and no completion message"
+                    }
+                # Return first command (Anthropic returns pyautogui commands directly)
+                return {
+                    "action_type": "PYAUTOGUI_COMMAND",
+                    "command": commands[0],
+                    "reasoning": reasoning
+                }
+            except Exception as e:
+                logger.error(f"Anthropic planning failed: {e}")
+                return {
+                    "action_type": "FAIL",
+                    "reason": f"Planning error: {str(e)}"
+                }
+        # Generic LLM client (for future integration with other LLMs)
+        # Encode screenshot to base64 for LLM
+        screenshot_b64 = base64.b64encode(screenshot).decode('utf-8')
+        # Prepare prompt for LLM
+        prompt = self._build_planning_prompt(
+            task_description=task_description,
+            action_history=action_history,
+        )
+        # Call LLM with vision input
+        try:
+            response = await self.llm_client.plan_action(
+                prompt=prompt,
+                image_base64=screenshot_b64,
+                action_space=ACTION_SPACE,
+                keyboard_keys=KEYBOARD_KEYS,
+            )
+            # Parse LLM response to action dict
+            action = self._parse_llm_response(response)
+            logger.info(f"LLM planned action: {action['action_type']}")
+            return action
+        except Exception as e:
+            logger.error(f"LLM planning failed: {e}")
+            return {
+                "action_type": "FAIL",
+                "reason": f"Planning error: {str(e)}"
+            }
+    def _build_planning_prompt(
+        self,
+        task_description: str,
+        action_history: list,
+    ) -> str:
+        """
+        Build prompt for LLM planning.
+        Args:
+            task_description: The task to accomplish
+            action_history: Previously executed actions
+        Returns:
+            Formatted prompt string
+        """
+        prompt = f"""You are a GUI automation agent. Your task is to complete the following:
+Task: {task_description}
+You can observe the current desktop state through the provided screenshot.
+You must plan the next action to take from the available ACTION_SPACE.
+Available actions:
+- Mouse: MOVE_TO, CLICK, RIGHT_CLICK, DOUBLE_CLICK, DRAG_TO, SCROLL
+- Keyboard: TYPING, PRESS, KEY_DOWN, KEY_UP, HOTKEY
+- Control: WAIT, DONE, FAIL
+"""
+        if action_history:
+            prompt += f"\nPrevious actions taken ({len(action_history)}):\n"
+            for i, action in enumerate(action_history[-5:], 1):  # Last 5 actions
+                prompt += f"{i}. {action['planned_action']['action_type']}"
+                if 'parameters' in action['planned_action']:
+                    prompt += f" - {action['planned_action']['parameters']}"
+                prompt += "\n"
+        prompt += """
+Based on the screenshot and task, output the next action in JSON format:
+{
+    "action_type": "ACTION_TYPE",
+    "parameters": {...},
+    "reasoning": "Why this action is needed"
+}
+If the task is complete, output: {"action_type": "DONE"}
+If the task cannot be completed, output: {"action_type": "FAIL", "reason": "explanation"}
+"""
+        return prompt
+    def _parse_llm_response(self, response: str) -> Dict[str, Any]:
+        """
+        Parse LLM response to extract action.
+        Args:
+            response: LLM response (should be JSON)
+        Returns:
+            Action dict with action_type and parameters
+        """
+        import json
+        try:
+            # Try to parse as JSON
+            action = json.loads(response)
+            # Validate action
+            if "action_type" not in action:
+                raise ValueError("Missing action_type in LLM response")
+            return action
+        except json.JSONDecodeError:
+            logger.error(f"Failed to parse LLM response as JSON: {response[:200]}")
+            return {
+                "action_type": "FAIL",
+                "reason": "Invalid LLM response format"
+            }
+    async def _execute_planned_action(
+        self,
+        action: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Execute a planned action through the connector.
+        Args:
+            action: Action dict with action_type and parameters
+        Returns:
+            Execution result dict
+        """
+        action_type = action["action_type"]
+        # Handle Anthropic's direct pyautogui commands
+        if action_type == "PYAUTOGUI_COMMAND":
+            command = action.get("command", "")
+            logger.info(f"Executing pyautogui command: {command}")
+            try:
+                result = await self.connector.execute_python_command(command)
+                return {
+                    "status": "success" if result else "error",
+                    "action_type": action_type,
+                    "command": command,
+                    "result": result
+                }
+            except Exception as e:
+                logger.error(f"Command execution error: {e}")
+                return {
+                    "status": "error",
+                    "action_type": action_type,
+                    "error": str(e)
+                }
+        # Handle standard action space commands
+        parameters = action.get("parameters", {})
+        logger.info(f"Executing action: {action_type}")
+        try:
+            result = await self.connector.execute_action(action_type, parameters)
+            return result
+        except Exception as e:
+            logger.error(f"Action execution error: {e}")
+            return {
+                "status": "error",
+                "action_type": action_type,
+                "error": str(e)
+            }
+    # Helper methods for direct action execution
+    async def execute_action(
+        self,
+        action_type: str,
+        parameters: Dict[str, Any]
+    ) -> ToolResult:
+        """
+        Direct action execution (bypass LLM planning).
+        Args:
+            action_type: Action type from ACTION_SPACE
+            parameters: Action parameters
+        Returns:
+            ToolResult with execution status
+        """
+        result = await self.connector.execute_action(action_type, parameters)
+        if result.get("status") == "success":
+            return ToolResult(
+                status=ToolStatus.SUCCESS,
+                content=f"Executed {action_type}",
+                metadata=result
+            )
+        else:
+            return ToolResult(
+                status=ToolStatus.ERROR,
+                error=result.get("error", "Unknown error"),
+                metadata=result
+            )
+    async def get_screenshot(self) -> ToolResult:
+        """Get current desktop screenshot."""
+        screenshot = await self.connector.get_screenshot()
+        if screenshot:
+            return ToolResult(
+                status=ToolStatus.SUCCESS,
+                content=screenshot,
+                metadata={"type": "screenshot", "size": len(screenshot)}
+            )
+        else:
+            return ToolResult(
+                status=ToolStatus.ERROR,
+                error="Failed to capture screenshot"
+            )
+    async def _record_intermediate_step(
+        self,
+        step_number: int,
+        planned_action: Dict[str, Any],
+        execution_result: Dict[str, Any],
+        screenshot: bytes,
+        task_description: str,
+    ):
+        """
+        Record an intermediate step of GUI agent execution.
+        This method records each planning-action cycle to the recording system,
+        providing detailed traces of GUI agent's decision-making process.
+        Args:
+            step_number: Step number in the execution sequence
+            planned_action: Action planned by LLM
+            execution_result: Result of executing the action
+            screenshot: Screenshot before executing the action
+            task_description: Overall task description
+        """
+        # Try to get recording_manager dynamically if not set at initialization
+        recording_manager = self.recording_manager
+        if not recording_manager and hasattr(self, '_runtime_info') and self._runtime_info:
+            # Try to get from grounding_client
+            grounding_client = self._runtime_info.grounding_client
+            if grounding_client and hasattr(grounding_client, 'recording_manager'):
+                recording_manager = grounding_client.recording_manager
+                logger.debug(f"Step {step_number}: Dynamically retrieved recording_manager from grounding_client")
+        if not recording_manager:
+            logger.debug(f"Step {step_number}: No recording_manager available, skipping intermediate step recording")
+            return
+        # Check if recording is active
+        try:
+            from openspace.recording.manager import RecordingManager
+            if not RecordingManager.is_recording():
+                logger.debug(f"Step {step_number}: RecordingManager not started")
+                return
+        except Exception as e:
+            logger.debug(f"Step {step_number}: Failed to check recording status: {e}")
+            return
+        # Check if recorder is initialized
+        if not hasattr(recording_manager, '_recorder') or not recording_manager._recorder:
+            logger.warning(f"Step {step_number}: recording_manager._recorder not initialized")
+            return
+        # Build command string for display
+        action_type = planned_action.get("action_type", "unknown")
+        command = self._format_action_command(planned_action)
+        # Build result summary
+        status = execution_result.get("status", "unknown")
+        is_success = status in ("success", "done", "observation")
+        # Build result content
+        if status == "done":
+            result_content = f"Task completed at step {step_number}"
+        elif status == "failed":
+            result_content = execution_result.get("message", "Task failed")
+        elif status == "observation":
+            result_content = execution_result.get("message", "Screenshot observation")
+        else:
+            result_content = execution_result.get("result", execution_result.get("message", str(execution_result)))
+        # Build parameters for recording
+        parameters = {
+            "task_description": task_description,
+            "step_number": step_number,
+            "action_type": action_type,
+            "planned_action": planned_action,
+        }
+        # Record to trajectory recorder (handles screenshot saving)
+        try:
+            await recording_manager._recorder.record_step(
+                backend="gui",
+                tool="gui_agent_step",
+                command=command,
+                result={
+                    "status": "success" if is_success else "error",
+                    "output": str(result_content)[:200],
+                },
+                parameters=parameters,
+                screenshot=screenshot,
+                extra={
+                    "gui_step_number": step_number,
+                    "reasoning": planned_action.get("reasoning", ""),
+                }
+            )
+            logger.info(f"✓ Recorded GUI intermediate step {step_number}: {command}")
+        except Exception as e:
+            logger.error(f"✗ Failed to record intermediate step {step_number}: {e}", exc_info=True)
+    def _format_action_command(self, planned_action: Dict[str, Any]) -> str:
+        """
+        Format planned action into a human-readable command string.
+        Args:
+            planned_action: Action dictionary from LLM planning
+        Returns:
+            Formatted command string
+        """
+        action_type = planned_action.get("action_type", "unknown")
+        # Handle special action types
+        if action_type == "DONE":
+            return "DONE (task completed)"
+        elif action_type == "FAIL":
+            reason = planned_action.get("reason", "unknown")
+            return f"FAIL ({reason})"
+        elif action_type == "WAIT":
+            return "WAIT (screenshot observation)"
+        # Handle PyAutoGUI commands
+        elif action_type == "PYAUTOGUI_COMMAND":
+            command = planned_action.get("command", "")
+            # Truncate long commands
+            if len(command) > 100:
+                return command[:100] + "..."
+            return command
+        # Handle standard action space commands
+        else:
+            parameters = planned_action.get("parameters", {})
+            if parameters:
+                # Format first 2 parameters
+                param_items = list(parameters.items())[:2]
+                param_str = ", ".join([f"{k}={v}" for k, v in param_items])
+                return f"{action_type}({param_str})"
+            else:
+                return action_type

openspace/grounding/backends/gui/transport/actions.py ADDED Viewed

	@@ -0,0 +1,232 @@

+"""
+GUI Action Space Definitions.
+"""
+from typing import Dict, Any
+# Screen resolution constants
+X_MAX = 1920
+Y_MAX = 1080
+# Keyboard keys constants
+KEYBOARD_KEYS = [
+    '\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/',
+    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@',
+    '[', '\\', ']', '^', '_', '`',
+    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+    '{', '|', '}', '~',
+    'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace',
+    'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop',
+    'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide',
+    'down', 'end', 'enter', 'esc', 'escape', 'execute',
+    'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19',
+    'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9',
+    'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji',
+    'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply',
+    'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9',
+    'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen',
+    'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator',
+    'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up',
+    'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen',
+    'command', 'option', 'optionleft', 'optionright'
+]
+# Action Space Definition
+ACTION_SPACE = [
+    {
+        "action_type": "MOVE_TO",
+        "note": "move the cursor to the specified position",
+        "parameters": {
+            "x": {"type": float, "range": [0, X_MAX], "optional": False},
+            "y": {"type": float, "range": [0, Y_MAX], "optional": False},
+        }
+    },
+    {
+        "action_type": "CLICK",
+        "note": "click the left button if button not specified, otherwise click the specified button",
+        "parameters": {
+            "button": {"type": str, "range": ["left", "right", "middle"], "optional": True},
+            "x": {"type": float, "range": [0, X_MAX], "optional": True},
+            "y": {"type": float, "range": [0, Y_MAX], "optional": True},
+            "num_clicks": {"type": int, "range": [1, 2, 3], "optional": True},
+        }
+    },
+    {
+        "action_type": "MOUSE_DOWN",
+        "note": "press the mouse button",
+        "parameters": {
+            "button": {"type": str, "range": ["left", "right", "middle"], "optional": True}
+        }
+    },
+    {
+        "action_type": "MOUSE_UP",
+        "note": "release the mouse button",
+        "parameters": {
+            "button": {"type": str, "range": ["left", "right", "middle"], "optional": True}
+        }
+    },
+    {
+        "action_type": "RIGHT_CLICK",
+        "note": "right click at position",
+        "parameters": {
+            "x": {"type": float, "range": [0, X_MAX], "optional": True},
+            "y": {"type": float, "range": [0, Y_MAX], "optional": True}
+        }
+    },
+    {
+        "action_type": "DOUBLE_CLICK",
+        "note": "double click at position",
+        "parameters": {
+            "x": {"type": float, "range": [0, X_MAX], "optional": True},
+            "y": {"type": float, "range": [0, Y_MAX], "optional": True}
+        }
+    },
+    {
+        "action_type": "DRAG_TO",
+        "note": "drag the cursor to position",
+        "parameters": {
+            "x": {"type": float, "range": [0, X_MAX], "optional": False},
+            "y": {"type": float, "range": [0, Y_MAX], "optional": False}
+        }
+    },
+    {
+        "action_type": "SCROLL",
+        "note": "scroll the mouse wheel",
+        "parameters": {
+            "dx": {"type": int, "range": None, "optional": False},
+            "dy": {"type": int, "range": None, "optional": False}
+        }
+    },
+    {
+        "action_type": "TYPING",
+        "note": "type the specified text",
+        "parameters": {
+            "text": {"type": str, "range": None, "optional": False}
+        }
+    },
+    {
+        "action_type": "PRESS",
+        "note": "press the specified key",
+        "parameters": {
+            "key": {"type": str, "range": KEYBOARD_KEYS, "optional": False}
+        }
+    },
+    {
+        "action_type": "KEY_DOWN",
+        "note": "press down the specified key",
+        "parameters": {
+            "key": {"type": str, "range": KEYBOARD_KEYS, "optional": False}
+        }
+    },
+    {
+        "action_type": "KEY_UP",
+        "note": "release the specified key",
+        "parameters": {
+            "key": {"type": str, "range": KEYBOARD_KEYS, "optional": False}
+        }
+    },
+    {
+        "action_type": "HOTKEY",
+        "note": "press key combination",
+        "parameters": {
+            "keys": {"type": list, "range": [KEYBOARD_KEYS], "optional": False}
+        }
+    },
+    {
+        "action_type": "WAIT",
+        "note": "wait until next action",
+    },
+    {
+        "action_type": "FAIL",
+        "note": "mark task as failed",
+    },
+    {
+        "action_type": "DONE",
+        "note": "mark task as done",
+    }
+]
+def build_pyautogui_command(action_type: str, parameters: Dict[str, Any]) -> str:
+    """
+    Build pyautogui command from action type and parameters.
+    Args:
+        action_type: Type of action (e.g., 'CLICK', 'TYPING')
+        parameters: Action parameters
+    Returns:
+        Python command string
+    """
+    if action_type == "MOVE_TO":
+        if "x" in parameters and "y" in parameters:
+            x, y = parameters["x"], parameters["y"]
+            return f"pyautogui.moveTo({x}, {y}, 0.5, pyautogui.easeInQuad)"
+        else:
+            return "pyautogui.moveTo()"
+    elif action_type == "CLICK":
+        button = parameters.get("button", "left")
+        num_clicks = parameters.get("num_clicks", 1)
+        if "x" in parameters and "y" in parameters:
+            x, y = parameters["x"], parameters["y"]
+            return f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})"
+        else:
+            return f"pyautogui.click(button='{button}', clicks={num_clicks})"
+    elif action_type == "MOUSE_DOWN":
+        button = parameters.get("button", "left")
+        return f"pyautogui.mouseDown(button='{button}')"
+    elif action_type == "MOUSE_UP":
+        button = parameters.get("button", "left")
+        return f"pyautogui.mouseUp(button='{button}')"
+    elif action_type == "RIGHT_CLICK":
+        if "x" in parameters and "y" in parameters:
+            x, y = parameters["x"], parameters["y"]
+            return f"pyautogui.rightClick(x={x}, y={y})"
+        else:
+            return "pyautogui.rightClick()"
+    elif action_type == "DOUBLE_CLICK":
+        if "x" in parameters and "y" in parameters:
+            x, y = parameters["x"], parameters["y"]
+            return f"pyautogui.doubleClick(x={x}, y={y})"
+        else:
+            return "pyautogui.doubleClick()"
+    elif action_type == "DRAG_TO":
+        if "x" in parameters and "y" in parameters:
+            x, y = parameters["x"], parameters["y"]
+            return f"pyautogui.dragTo({x}, {y}, 1.0, button='left')"
+    elif action_type == "SCROLL":
+        dx = parameters.get("dx", 0)
+        dy = parameters.get("dy", 0)
+        return f"pyautogui.scroll({dy})"
+    elif action_type == "TYPING":
+        text = parameters.get("text", "")
+        # Use repr() for proper string escaping
+        return f"pyautogui.typewrite({repr(text)})"
+    elif action_type == "PRESS":
+        key = parameters.get("key", "")
+        return f"pyautogui.press('{key}')"
+    elif action_type == "KEY_DOWN":
+        key = parameters.get("key", "")
+        return f"pyautogui.keyDown('{key}')"
+    elif action_type == "KEY_UP":
+        key = parameters.get("key", "")
+        return f"pyautogui.keyUp('{key}')"
+    elif action_type == "HOTKEY":
+        keys = parameters.get("keys", [])
+        if keys:
+            keys_str = ", ".join([f"'{k}'" for k in keys])
+            return f"pyautogui.hotkey({keys_str})"
+    return None

openspace/grounding/backends/gui/transport/connector.py ADDED Viewed

	@@ -0,0 +1,389 @@

+import asyncio
+import re
+from typing import Any, Dict, Optional
+from openspace.grounding.core.transport.connectors import AioHttpConnector
+from .actions import build_pyautogui_command, KEYBOARD_KEYS
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+class GUIConnector(AioHttpConnector):
+    """
+    Connector for desktop_env HTTP API.
+    Provides action execution and observation methods.
+    """
+    def __init__(
+        self,
+        vm_ip: str,
+        server_port: int = 5000,
+        timeout: int = 90,
+        retry_times: int = 3,
+        retry_interval: float = 5.0,
+        pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}",
+    ):
+        """
+        Initialize GUI connector.
+        Args:
+            vm_ip: IP address of the VM running desktop_env
+            server_port: Port of the desktop_env HTTP server
+            timeout: Request timeout in seconds
+            retry_times: Number of retries for failed requests
+            retry_interval: Interval between retries in seconds
+            pkgs_prefix: Python command prefix for pyautogui setup
+        """
+        base_url = f"http://{vm_ip}:{server_port}"
+        super().__init__(base_url, timeout=timeout)
+        self.vm_ip = vm_ip
+        self.server_port = server_port
+        self.retry_times = retry_times
+        self.retry_interval = retry_interval
+        self.pkgs_prefix = pkgs_prefix
+        self.timeout = timeout
+    async def _retry_invoke(
+        self,
+        operation_name: str,
+        operation_func,
+        *args,
+        **kwargs
+    ):
+        """
+        Execute operation with retry logic.
+        Args:
+            operation_name: Name of operation for logging
+            operation_func: Async function to execute
+            *args: Positional arguments for operation_func
+            **kwargs: Keyword arguments for operation_func
+        Returns:
+            Operation result
+        Raises:
+            Exception: Last exception after all retries fail
+        """
+        last_exc: Exception | None = None
+        for attempt in range(1, self.retry_times + 1):
+            try:
+                result = await operation_func(*args, **kwargs)
+                logger.debug("%s executed successfully (attempt %d/%d)", operation_name, attempt, self.retry_times)
+                return result
+            except asyncio.TimeoutError as exc:
+                logger.error("%s timed out", operation_name)
+                raise RuntimeError(f"{operation_name} timed out after {self.timeout} seconds") from exc
+            except Exception as exc:
+                last_exc = exc
+                if attempt == self.retry_times:
+                    break
+                logger.warning(
+                    "%s failed (attempt %d/%d): %s, retrying in %.1f seconds...",
+                    operation_name, attempt, self.retry_times, exc, self.retry_interval
+                )
+                await asyncio.sleep(self.retry_interval)
+        error_msg = f"{operation_name} failed after {self.retry_times} retries"
+        logger.error(error_msg)
+        raise last_exc or RuntimeError(error_msg)
+    @staticmethod
+    def _is_valid_image_response(content_type: str, data: Optional[bytes]) -> bool:
+        """Validate image response using magic bytes."""
+        if not isinstance(data, (bytes, bytearray)) or not data:
+            return False
+        # PNG magic
+        if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n":
+            return True
+        # JPEG magic
+        if len(data) >= 3 and data[:3] == b"\xff\xd8\xff":
+            return True
+        # Fallback to content-type
+        if content_type and ("image/png" in content_type or "image/jpeg" in content_type):
+            return True
+        return False
+    @staticmethod
+    def _fix_pyautogui_less_than_bug(command: str) -> str:
+        """
+        Fix PyAutoGUI '<' character bug by converting it to hotkey("shift", ',') calls.
+        This fixes the known PyAutoGUI issue where typing '<' produces '>' instead.
+        References:
+        - https://github.com/asweigart/pyautogui/issues/198
+        - https://github.com/xlang-ai/OSWorld/issues/257
+        Args:
+            command (str): The original pyautogui command
+        Returns:
+            str: The fixed command with '<' characters handled properly
+        """
+        # Pattern to match press('<') or press('\u003c') calls
+        press_pattern = r'pyautogui\.press\(["\'](?:<|\\u003c)["\']\)'
+        # Handle press('<') calls
+        def replace_press_less_than(match):
+            return 'pyautogui.hotkey("shift", ",")'
+        # First handle press('<') calls
+        command = re.sub(press_pattern, replace_press_less_than, command)
+        # Pattern to match typewrite calls with quoted strings
+        typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'
+        # Then handle typewrite calls
+        def process_typewrite_match(match):
+            quote_char = match.group(1)
+            content = match.group(2)
+            # Preprocess: Try to decode Unicode escapes like \u003c to actual '<'
+            # This handles cases where '<' is represented as escaped Unicode
+            try:
+                # Attempt to decode unicode escapes
+                decoded_content = content.encode('utf-8').decode('unicode_escape')
+                content = decoded_content
+            except UnicodeDecodeError:
+                # If decoding fails, proceed with original content to avoid breaking existing logic
+                pass  # Graceful degradation - fall back to original content if decoding fails
+            # Check if content contains '<'
+            if '<' not in content:
+                return match.group(0)
+            # Split by '<' and rebuild
+            parts = content.split('<')
+            result_parts = []
+            for i, part in enumerate(parts):
+                if i == 0:
+                    # First part
+                    if part:
+                        result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
+                else:
+                    # Add hotkey for '<' and then typewrite for the rest
+                    result_parts.append('pyautogui.hotkey("shift", ",")')
+                    if part:
+                        result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
+            return '; '.join(result_parts)
+        command = re.sub(typewrite_pattern, process_typewrite_match, command)
+        return command
+    async def get_screen_size(self) -> Optional[tuple[int, int]]:
+        """
+        Get actual screen size from desktop environment using pyautogui.
+        Returns:
+            (width, height) tuple, or None on failure
+        """
+        try:
+            command = "print(pyautogui.size())"
+            result = await self.execute_python_command(command)
+            if result and result.get("status") == "success":
+                output = result.get("output", "")
+                # Parse output like "Size(width=2880, height=1800)"
+                import re
+                match = re.search(r'width=(\d+).*height=(\d+)', output)
+                if match:
+                    width = int(match.group(1))
+                    height = int(match.group(2))
+                    logger.info(f"Detected screen size: {width}x{height}")
+                    return (width, height)
+            logger.warning(f"Failed to detect screen size, output: {result}")
+            return None
+        except Exception as e:
+            logger.error(f"Failed to get screen size: {e}")
+            return None
+    async def get_screenshot(self) -> Optional[bytes]:
+        """
+        Get screenshot from desktop environment.
+        Returns:
+            Screenshot image bytes (PNG/JPEG), or None on failure
+        """
+        try:
+            async def _get():
+                response = await self._request("GET", "/screenshot", timeout=10)
+                if response.status == 200:
+                    content_type = response.headers.get("Content-Type", "")
+                    content = await response.read()
+                    if self._is_valid_image_response(content_type, content):
+                        return content
+                    else:
+                        raise ValueError("Invalid screenshot format")
+                else:
+                    raise RuntimeError(f"HTTP {response.status}")
+            return await self._retry_invoke("get_screenshot", _get)
+        except Exception as e:
+            logger.error(f"Failed to get screenshot: {e}")
+            return None
+    async def execute_python_command(self, command: str) -> Optional[Dict[str, Any]]:
+        """
+        Execute a Python command on desktop environment.
+        Used for pyautogui commands.
+        Args:
+            command: Python command to execute
+        Returns:
+            Response dict with execution result, or None on failure
+        """
+        try:
+            # Apply '<' character fix for PyAutoGUI bug
+            fixed_command = self._fix_pyautogui_less_than_bug(command)
+            command_list = ["python", "-c", self.pkgs_prefix.format(command=fixed_command)]
+            payload = {"command": command_list, "shell": False}
+            async def _execute():
+                return await self.post_json("/execute", payload)
+            return await self._retry_invoke("execute_python_command", _execute)
+        except Exception as e:
+            logger.error(f"Failed to execute command: {e}")
+            return None
+    async def execute_action(self, action_type: str, parameters: Dict[str, Any] = None) -> Dict[str, Any]:
+        """
+        Execute a desktop action.
+        This is the main method for action space execution.
+        Args:
+            action_type: Action type (e.g., 'CLICK', 'TYPING')
+            parameters: Action parameters
+        Returns:
+            Result dict with execution status
+        """
+        parameters = parameters or {}
+        # Handle control actions
+        if action_type in ['WAIT', 'FAIL', 'DONE']:
+            return {
+                "status": "success",
+                "action_type": action_type,
+                "message": f"Control action {action_type} acknowledged"
+            }
+        # Validate keyboard keys
+        if action_type in ['PRESS', 'KEY_DOWN', 'KEY_UP']:
+            key = parameters.get('key')
+            if key and key not in KEYBOARD_KEYS:
+                return {
+                    "status": "error",
+                    "action_type": action_type,
+                    "error": f"Invalid key: {key}. Must be in supported keyboard keys."
+                }
+        if action_type == 'HOTKEY':
+            keys = parameters.get('keys', [])
+            invalid_keys = [k for k in keys if k not in KEYBOARD_KEYS]
+            if invalid_keys:
+                return {
+                    "status": "error",
+                    "action_type": action_type,
+                    "error": f"Invalid keys: {invalid_keys}"
+                }
+        # Build pyautogui command
+        command = build_pyautogui_command(action_type, parameters)
+        if command is None:
+            return {
+                "status": "error",
+                "action_type": action_type,
+                "error": f"Unsupported action type: {action_type}"
+            }
+        # Execute command
+        result = await self.execute_python_command(command)
+        if result:
+            return {
+                "status": "success",
+                "action_type": action_type,
+                "parameters": parameters,
+                "result": result
+            }
+        else:
+            return {
+                "status": "error",
+                "action_type": action_type,
+                "parameters": parameters,
+                "error": "Command execution failed"
+            }
+    async def get_accessibility_tree(self, max_depth: int = 5) -> Optional[Dict[str, Any]]:
+        """
+        Get accessibility tree from desktop environment.
+        Args:
+            max_depth: Maximum depth of accessibility tree traversal
+        Returns:
+            Accessibility tree as dict, or None on failure
+        """
+        try:
+            async def _get():
+                response = await self._request("GET", "/accessibility", timeout=10)
+                if response.status == 200:
+                    data = await response.json()
+                    return data.get("AT")
+                else:
+                    raise RuntimeError(f"HTTP {response.status}")
+            return await self._retry_invoke("get_accessibility_tree", _get)
+        except Exception as e:
+            logger.error(f"Failed to get accessibility tree: {e}")
+            return None
+    async def get_cursor_position(self) -> Optional[tuple[int, int]]:
+        """
+        Get current mouse cursor position.
+        Useful for GUI debugging and relative positioning.
+        Returns:
+            (x, y) tuple, or None on failure
+        """
+        try:
+            async def _get():
+                result = await self.get_json("/cursor_position")
+                return (result.get("x"), result.get("y"))
+            return await self._retry_invoke("get_cursor_position", _get)
+        except Exception as e:
+            logger.error(f"Failed to get cursor position: {e}")
+            return None
+    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
+        """
+        Unified RPC entry for operations.
+        Required by BaseConnector.
+        Args:
+            name: Operation name (action_type or observation method)
+            params: Operation parameters
+        Returns:
+            Operation result
+        """
+        # Handle observation methods
+        if name == "screenshot":
+            return await self.get_screenshot()
+        elif name == "accessibility_tree":
+            max_depth = params.get("max_depth", 5) if params else 5
+            return await self.get_accessibility_tree(max_depth)
+        elif name == "cursor_position":
+            return await self.get_cursor_position()
+        else:
+            # Treat as action
+            return await self.execute_action(name.upper(), params or {})

openspace/grounding/backends/gui/transport/local_connector.py ADDED Viewed

	@@ -0,0 +1,364 @@

+"""
+Local GUI Connector — execute GUI operations directly in-process.
+This connector has the **same public API** as GUIConnector (HTTP version)
+but uses local pyautogui / ScreenshotHelper / AccessibilityHelper,
+removing the need for a local_server.
+Return format is kept identical so that GUISession / GUIAgentTool
+work without any changes.
+"""
+import asyncio
+import os
+import platform
+import re
+import tempfile
+import uuid
+from typing import Any, Dict, Optional
+from openspace.grounding.core.transport.connectors.base import BaseConnector
+from openspace.grounding.core.transport.task_managers.noop import NoOpConnectionManager
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+platform_name = platform.system()
+class LocalGUIConnector(BaseConnector[Any]):
+    """
+    GUI connector that runs desktop automation **locally** using pyautogui /
+    ScreenshotHelper / AccessibilityHelper, bypassing the Flask local_server.
+    Public API is compatible with ``GUIConnector`` so that ``GUISession``
+    works without modification.
+    """
+    def __init__(
+        self,
+        timeout: int = 90,
+        retry_times: int = 3,
+        retry_interval: float = 5.0,
+        pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}",
+    ):
+        super().__init__(NoOpConnectionManager())
+        self.timeout = timeout
+        self.retry_times = retry_times
+        self.retry_interval = retry_interval
+        self.pkgs_prefix = pkgs_prefix
+        # Compatibility attributes expected by GUISession
+        self.vm_ip = "localhost"
+        self.server_port = 0
+        self.base_url = "local://localhost"
+        # Lazy-initialized helpers (avoid import side effects at class load)
+        self._screenshot_helper = None
+        self._accessibility_helper = None
+    def _get_screenshot_helper(self):
+        if self._screenshot_helper is None:
+            from openspace.local_server.utils import ScreenshotHelper
+            self._screenshot_helper = ScreenshotHelper()
+        return self._screenshot_helper
+    def _get_accessibility_helper(self):
+        if self._accessibility_helper is None:
+            from openspace.local_server.utils import AccessibilityHelper
+            self._accessibility_helper = AccessibilityHelper()
+        return self._accessibility_helper
+    # ------------------------------------------------------------------
+    # connect / disconnect
+    # ------------------------------------------------------------------
+    async def connect(self) -> None:
+        """No real connection for local mode."""
+        if self._connected:
+            return
+        await super().connect()
+        logger.info("LocalGUIConnector: ready (local mode, no server required)")
+    # ------------------------------------------------------------------
+    # Retry wrapper (same interface as GUIConnector._retry_invoke)
+    # ------------------------------------------------------------------
+    async def _retry_invoke(
+        self,
+        operation_name: str,
+        operation_func,
+        *args,
+        **kwargs,
+    ):
+        last_exc: Exception | None = None
+        for attempt in range(1, self.retry_times + 1):
+            try:
+                result = await operation_func(*args, **kwargs)
+                logger.debug(
+                    "%s executed successfully (attempt %d/%d)",
+                    operation_name, attempt, self.retry_times,
+                )
+                return result
+            except asyncio.TimeoutError as exc:
+                logger.error("%s timed out", operation_name)
+                raise RuntimeError(
+                    f"{operation_name} timed out after {self.timeout} seconds"
+                ) from exc
+            except Exception as exc:
+                last_exc = exc
+                if attempt == self.retry_times:
+                    break
+                logger.warning(
+                    "%s failed (attempt %d/%d): %s, retrying in %.1f seconds...",
+                    operation_name, attempt, self.retry_times, exc, self.retry_interval,
+                )
+                await asyncio.sleep(self.retry_interval)
+        error_msg = f"{operation_name} failed after {self.retry_times} retries"
+        logger.error(error_msg)
+        raise last_exc or RuntimeError(error_msg)
+    # ------------------------------------------------------------------
+    # PyAutoGUI '<' bug fix (same as GUIConnector)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _fix_pyautogui_less_than_bug(command: str) -> str:
+        """Fix PyAutoGUI '<' character bug."""
+        press_pattern = r'pyautogui\.press\(["\'](?:<|\\u003c)["\']\)'
+        def replace_press_less_than(match):
+            return 'pyautogui.hotkey("shift", ",")'
+        command = re.sub(press_pattern, replace_press_less_than, command)
+        typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'
+        def process_typewrite_match(match):
+            quote_char = match.group(1)
+            content = match.group(2)
+            try:
+                decoded_content = content.encode("utf-8").decode("unicode_escape")
+                content = decoded_content
+            except UnicodeDecodeError:
+                pass
+            if "<" not in content:
+                return match.group(0)
+            parts = content.split("<")
+            result_parts = []
+            for i, part in enumerate(parts):
+                if i == 0:
+                    if part:
+                        result_parts.append(
+                            f"pyautogui.typewrite({quote_char}{part}{quote_char})"
+                        )
+                else:
+                    result_parts.append('pyautogui.hotkey("shift", ",")')
+                    if part:
+                        result_parts.append(
+                            f"pyautogui.typewrite({quote_char}{part}{quote_char})"
+                        )
+            return "; ".join(result_parts)
+        command = re.sub(typewrite_pattern, process_typewrite_match, command)
+        return command
+    # ------------------------------------------------------------------
+    # Image response validation (same as GUIConnector)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _is_valid_image_response(content_type: str, data: Optional[bytes]) -> bool:
+        if not isinstance(data, (bytes, bytearray)) or not data:
+            return False
+        if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n":
+            return True
+        if len(data) >= 3 and data[:3] == b"\xff\xd8\xff":
+            return True
+        if content_type and ("image/png" in content_type or "image/jpeg" in content_type):
+            return True
+        return False
+    # ------------------------------------------------------------------
+    # Public API (same signatures as GUIConnector)
+    # ------------------------------------------------------------------
+    async def get_screen_size(self) -> Optional[tuple[int, int]]:
+        """Get screen size using pyautogui."""
+        try:
+            command = "print(pyautogui.size())"
+            result = await self.execute_python_command(command)
+            if result and result.get("status") == "success":
+                output = result.get("output", "")
+                match = re.search(r"width=(\d+).*height=(\d+)", output)
+                if match:
+                    width = int(match.group(1))
+                    height = int(match.group(2))
+                    logger.info("Detected screen size: %dx%d", width, height)
+                    return (width, height)
+            logger.warning("Failed to detect screen size, output: %s", result)
+            return None
+        except Exception as e:
+            logger.error("Failed to get screen size: %s", e)
+            return None
+    async def get_screenshot(self) -> Optional[bytes]:
+        """Capture screenshot locally using ScreenshotHelper."""
+        try:
+            async def _get():
+                helper = self._get_screenshot_helper()
+                tmp_path = os.path.join(
+                    tempfile.gettempdir(), f"screenshot_{uuid.uuid4().hex}.png"
+                )
+                if helper.capture(tmp_path, with_cursor=True):
+                    with open(tmp_path, "rb") as f:
+                        data = f.read()
+                    os.remove(tmp_path)
+                    return data
+                else:
+                    raise RuntimeError("Screenshot capture failed")
+            return await self._retry_invoke("get_screenshot", _get)
+        except Exception as e:
+            logger.error("Failed to get screenshot: %s", e)
+            return None
+    async def execute_python_command(self, command: str) -> Optional[Dict[str, Any]]:
+        """Execute a pyautogui Python command locally via subprocess."""
+        try:
+            fixed_command = self._fix_pyautogui_less_than_bug(command)
+            full_command = self.pkgs_prefix.format(command=fixed_command)
+            async def _execute():
+                python_cmd = "python" if platform_name == "Windows" else "python3"
+                proc = await asyncio.create_subprocess_exec(
+                    python_cmd, "-c", full_command,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE,
+                )
+                stdout_b, stderr_b = await asyncio.wait_for(
+                    proc.communicate(), timeout=self.timeout
+                )
+                stdout = stdout_b.decode("utf-8", errors="replace") if stdout_b else ""
+                stderr = stderr_b.decode("utf-8", errors="replace") if stderr_b else ""
+                returncode = proc.returncode or 0
+                return {
+                    "status": "success" if returncode == 0 else "error",
+                    "output": stdout + stderr,
+                    "error": stderr if returncode != 0 else "",
+                    "returncode": returncode,
+                }
+            return await self._retry_invoke("execute_python_command", _execute)
+        except Exception as e:
+            logger.error("Failed to execute command: %s", e)
+            return None
+    async def execute_action(
+        self, action_type: str, parameters: Dict[str, Any] | None = None
+    ) -> Dict[str, Any]:
+        """Execute a desktop action (same logic as GUIConnector)."""
+        parameters = parameters or {}
+        if action_type in ["WAIT", "FAIL", "DONE"]:
+            return {
+                "status": "success",
+                "action_type": action_type,
+                "message": f"Control action {action_type} acknowledged",
+            }
+        # Import action builder (same module used by GUIConnector)
+        from openspace.grounding.backends.gui.transport.actions import (
+            build_pyautogui_command,
+            KEYBOARD_KEYS,
+        )
+        if action_type in ["PRESS", "KEY_DOWN", "KEY_UP"]:
+            key = parameters.get("key")
+            if key and key not in KEYBOARD_KEYS:
+                return {
+                    "status": "error",
+                    "action_type": action_type,
+                    "error": f"Invalid key: {key}. Must be in supported keyboard keys.",
+                }
+        if action_type == "HOTKEY":
+            keys = parameters.get("keys", [])
+            invalid_keys = [k for k in keys if k not in KEYBOARD_KEYS]
+            if invalid_keys:
+                return {
+                    "status": "error",
+                    "action_type": action_type,
+                    "error": f"Invalid keys: {invalid_keys}",
+                }
+        command = build_pyautogui_command(action_type, parameters)
+        if command is None:
+            return {
+                "status": "error",
+                "action_type": action_type,
+                "error": f"Unsupported action type: {action_type}",
+            }
+        result = await self.execute_python_command(command)
+        if result:
+            return {
+                "status": "success",
+                "action_type": action_type,
+                "parameters": parameters,
+                "result": result,
+            }
+        else:
+            return {
+                "status": "error",
+                "action_type": action_type,
+                "parameters": parameters,
+                "error": "Command execution failed",
+            }
+    async def get_accessibility_tree(
+        self, max_depth: int = 5
+    ) -> Optional[Dict[str, Any]]:
+        """Get accessibility tree locally."""
+        try:
+            async def _get():
+                helper = self._get_accessibility_helper()
+                return helper.get_tree(max_depth=max_depth)
+            return await self._retry_invoke("get_accessibility_tree", _get)
+        except Exception as e:
+            logger.error("Failed to get accessibility tree: %s", e)
+            return None
+    async def get_cursor_position(self) -> Optional[tuple[int, int]]:
+        """Get cursor position locally."""
+        try:
+            async def _get():
+                helper = self._get_screenshot_helper()
+                return helper.get_cursor_position()
+            return await self._retry_invoke("get_cursor_position", _get)
+        except Exception as e:
+            logger.error("Failed to get cursor position: %s", e)
+            return None
+    # ------------------------------------------------------------------
+    # BaseConnector abstract methods
+    # ------------------------------------------------------------------
+    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
+        if name == "screenshot":
+            return await self.get_screenshot()
+        elif name == "accessibility_tree":
+            max_depth = params.get("max_depth", 5) if params else 5
+            return await self.get_accessibility_tree(max_depth)
+        elif name == "cursor_position":
+            return await self.get_cursor_position()
+        else:
+            return await self.execute_action(name.upper(), params or {})
+    async def request(self, *args: Any, **kwargs: Any) -> Any:
+        raise NotImplementedError(
+            "LocalGUIConnector does not support raw HTTP requests"
+        )

openspace/grounding/backends/mcp/__init__.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""
+MCP Backend for OpenSpace Grounding.
+This module provides the MCP (Model Context Protocol) backend implementation
+for the grounding framework. It includes:
+- MCPProvider: Manages multiple MCP server sessions
+- MCPSession: Handles individual MCP server connections
+- MCPClient: High-level client for MCP server configuration
+- MCPInstallerManager: Manages automatic installation of MCP dependencies
+- MCPToolCache: Caches tool metadata to avoid starting servers on list_tools
+"""
+from .provider import MCPProvider
+from .session import MCPSession
+from .client import MCPClient
+from .installer import (
+    MCPInstallerManager,
+    get_global_installer,
+    set_global_installer,
+    MCPDependencyError,
+    MCPCommandNotFoundError,
+    MCPInstallationCancelledError,
+    MCPInstallationFailedError,
+)
+from .tool_cache import MCPToolCache, get_tool_cache
+__all__ = [
+    "MCPProvider",
+    "MCPSession",
+    "MCPClient",
+    "MCPInstallerManager",
+    "get_global_installer",
+    "set_global_installer",
+    "MCPDependencyError",
+    "MCPCommandNotFoundError",
+    "MCPInstallationCancelledError",
+    "MCPInstallationFailedError",
+    "MCPToolCache",
+    "get_tool_cache",
+]

openspace/grounding/backends/mcp/client.py ADDED Viewed

	@@ -0,0 +1,409 @@

+"""
+Client for managing MCP servers and sessions.
+This module provides a high-level client that manages MCP servers, connectors,
+and sessions from configuration.
+"""
+import asyncio
+import warnings
+from typing import Any, Optional
+from openspace.grounding.core.types import SandboxOptions
+from openspace.config.utils import get_config_value, save_json_file, load_json_file
+from .config import create_connector_from_config
+from .session import MCPSession
+from .installer import MCPInstallerManager, MCPDependencyError
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+class MCPClient:
+    """Client for managing MCP servers and sessions.
+    This class provides a unified interface for working with MCP servers,
+    handling configuration, connector creation, and session management.
+    """
+    def __init__(
+        self,
+        config: str | dict[str, Any] | None = None,
+        sandbox: bool = False,
+        sandbox_options: SandboxOptions | None = None,
+        timeout: float = 30.0,
+        sse_read_timeout: float = 300.0,
+        max_retries: int = 3,
+        retry_interval: float = 2.0,
+        installer: Optional[MCPInstallerManager] = None,
+        check_dependencies: bool = True,
+        tool_call_max_retries: int = 3,
+        tool_call_retry_delay: float = 1.0,
+    ) -> None:
+        """Initialize a new MCP client.
+        Args:
+            config: Either a dict containing configuration or a path to a JSON config file.
+                   If None, an empty configuration is used.
+            sandbox: Whether to use sandboxed execution mode for running MCP servers.
+            sandbox_options: Optional sandbox configuration options.
+            timeout: Timeout for operations in seconds (default: 30.0)
+            sse_read_timeout: SSE read timeout in seconds (default: 300.0)
+            max_retries: Maximum number of retry attempts for failed operations (default: 3)
+            retry_interval: Wait time between retries in seconds (default: 2.0)
+            installer: Optional installer manager for dependency installation
+            check_dependencies: Whether to check and install dependencies (default: True)
+            tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
+            tool_call_retry_delay: Initial delay between tool call retries in seconds (default: 1.0)
+        """
+        self.config: dict[str, Any] = {}
+        self.sandbox = sandbox
+        self.sandbox_options = sandbox_options
+        self.timeout = timeout
+        self.sse_read_timeout = sse_read_timeout
+        self.max_retries = max_retries
+        self.retry_interval = retry_interval
+        self.installer = installer
+        self.check_dependencies = check_dependencies
+        self.tool_call_max_retries = tool_call_max_retries
+        self.tool_call_retry_delay = tool_call_retry_delay
+        self.sessions: dict[str, MCPSession] = {}
+        self.active_sessions: list[str] = []
+        # Load configuration if provided
+        if config is not None:
+            if isinstance(config, str):
+                self.config = load_json_file(config)
+            else:
+                self.config = config
+    def _get_mcp_servers(self) -> dict[str, Any]:
+        """Internal helper to get mcpServers configuration.
+        Tries both 'mcpServers' and 'servers' keys for compatibility.
+        Returns:
+            Dictionary of MCP server configurations, empty dict if none found.
+        """
+        servers = get_config_value(self.config, "mcpServers", None)
+        if servers is None:
+            servers = get_config_value(self.config, "servers", {})
+        return servers or {}
+    @classmethod
+    def from_dict(
+        cls,
+        config: dict[str, Any],
+        sandbox: bool = False,
+        sandbox_options: SandboxOptions | None = None,
+        timeout: float = 30.0,
+        sse_read_timeout: float = 300.0,
+        max_retries: int = 3,
+        retry_interval: float = 2.0,
+    ) -> "MCPClient":
+        """Create a MCPClient from a dictionary.
+        Args:
+            config: The configuration dictionary.
+            sandbox: Whether to use sandboxed execution mode for running MCP servers.
+            sandbox_options: Optional sandbox configuration options.
+            timeout: Timeout for operations in seconds (default: 30.0)
+            sse_read_timeout: SSE read timeout in seconds (default: 300.0)
+            max_retries: Maximum number of retry attempts (default: 3)
+            retry_interval: Wait time between retries in seconds (default: 2.0)
+        """
+        return cls(config=config, sandbox=sandbox, sandbox_options=sandbox_options,
+                   timeout=timeout, sse_read_timeout=sse_read_timeout,
+                   max_retries=max_retries, retry_interval=retry_interval)
+    @classmethod
+    def from_config_file(
+        cls, filepath: str, sandbox: bool = False, sandbox_options: SandboxOptions | None = None,
+        timeout: float = 30.0, sse_read_timeout: float = 300.0,
+        max_retries: int = 3, retry_interval: float = 2.0,
+    ) -> "MCPClient":
+        """Create a MCPClient from a configuration file.
+        Args:
+            filepath: The path to the configuration file.
+            sandbox: Whether to use sandboxed execution mode for running MCP servers.
+            sandbox_options: Optional sandbox configuration options.
+            timeout: Timeout for operations in seconds (default: 30.0)
+            sse_read_timeout: SSE read timeout in seconds (default: 300.0)
+            max_retries: Maximum number of retry attempts (default: 3)
+            retry_interval: Wait time between retries in seconds (default: 2.0)
+        """
+        return cls(config=load_json_file(filepath), sandbox=sandbox, sandbox_options=sandbox_options,
+                   timeout=timeout, sse_read_timeout=sse_read_timeout,
+                   max_retries=max_retries, retry_interval=retry_interval)
+    def add_server(
+        self,
+        name: str,
+        server_config: dict[str, Any],
+    ) -> None:
+        """Add a server configuration.
+        Args:
+            name: The name to identify this server.
+            server_config: The server configuration.
+        """
+        mcp_servers = self._get_mcp_servers()
+        if "mcpServers" not in self.config:
+            self.config["mcpServers"] = {}
+        self.config["mcpServers"][name] = server_config
+        logger.debug(f"Added MCP server configuration: {name}")
+    def remove_server(self, name: str) -> None:
+        """Remove a server configuration.
+        Args:
+            name: The name of the server to remove.
+        """
+        mcp_servers = self._get_mcp_servers()
+        if name in mcp_servers:
+            # Remove from config
+            if "mcpServers" in self.config:
+                self.config["mcpServers"].pop(name, None)
+            elif "servers" in self.config:
+                self.config["servers"].pop(name, None)
+            # If we removed an active session, remove it from active_sessions
+            if name in self.active_sessions:
+                self.active_sessions.remove(name)
+            logger.debug(f"Removed MCP server configuration: {name}")
+        else:
+            logger.warning(f"Server '{name}' not found in configuration")
+    def get_server_names(self) -> list[str]:
+        """Get the list of configured server names.
+        Returns:
+            List of server names.
+        """
+        return list(self._get_mcp_servers().keys())
+    def save_config(self, filepath: str) -> None:
+        """Save the current configuration to a file.
+        Args:
+            filepath: The path to save the configuration to.
+        """
+        save_json_file(self.config, filepath)
+    async def create_session(self, server_name: str, auto_initialize: bool = True) -> MCPSession:
+        """Create a session for the specified server with retry logic.
+        Args:
+            server_name: The name of the server to create a session for.
+            auto_initialize: Whether to automatically initialize the session.
+        Returns:
+            The created MCPSession.
+        Raises:
+            ValueError: If the specified server doesn't exist.
+            Exception: If session creation fails after all retries.
+        """
+        # Check if session already exists
+        if server_name in self.sessions:
+            logger.debug(f"Session for server '{server_name}' already exists, returning existing session")
+            return self.sessions[server_name]
+        # Get server config
+        servers = self._get_mcp_servers()
+        if not servers:
+            warnings.warn("No MCP servers defined in config", UserWarning, stacklevel=2)
+            return None
+        if server_name not in servers:
+            raise ValueError(f"Server '{server_name}' not found in config. Available: {list(servers.keys())}")
+        server_config = servers[server_name]
+        # Retry logic for session creation
+        last_exc: Exception | None = None
+        for attempt in range(1, self.max_retries + 1):
+            try:
+                # Create connector with options (now async)
+                connector = await create_connector_from_config(
+                    server_config,
+                    server_name=server_name,
+                    sandbox=self.sandbox,
+                    sandbox_options=self.sandbox_options,
+                    timeout=self.timeout,
+                    sse_read_timeout=self.sse_read_timeout,
+                    installer=self.installer,
+                    check_dependencies=self.check_dependencies,
+                    tool_call_max_retries=self.tool_call_max_retries,
+                    tool_call_retry_delay=self.tool_call_retry_delay,
+                )
+                # Create the session with proper initialization parameters
+                session = MCPSession(
+                    connector=connector,
+                    session_id=f"mcp-{server_name}",
+                    auto_connect=True,
+                    auto_initialize=False,  # We'll handle initialization explicitly below
+                )
+                # Initialize if requested
+                if auto_initialize:
+                    await session.initialize()
+                    logger.debug(f"Initialized session for server '{server_name}'")
+                # Store session
+                self.sessions[server_name] = session
+                # Add to active sessions
+                if server_name not in self.active_sessions:
+                    self.active_sessions.append(server_name)
+                logger.info(f"Created session for MCP server '{server_name}' (attempt {attempt}/{self.max_retries})")
+                return session
+            except MCPDependencyError as e:
+                # Don't retry dependency errors - they won't succeed on retry
+                # Error already shown to user by installer, just re-raise
+                logger.debug(f"Dependency error for server '{server_name}': {type(e).__name__}")
+                raise
+            except Exception as e:
+                last_exc = e
+                if attempt == self.max_retries:
+                    break
+                # Use info level for first attempt (common after fresh install), warning for subsequent
+                log_level = logger.info if attempt == 1 else logger.warning
+                log_level(
+                    f"Failed to create session for server '{server_name}' (attempt {attempt}/{self.max_retries}): {e}, "
+                    f"retrying in {self.retry_interval} seconds..."
+                )
+                await asyncio.sleep(self.retry_interval)
+        # All retries failed
+        error_msg = f"Failed to create session for server '{server_name}' after {self.max_retries} retries"
+        logger.error(error_msg)
+        raise last_exc or RuntimeError(error_msg)
+    async def create_all_sessions(
+        self,
+        auto_initialize: bool = True,
+    ) -> dict[str, MCPSession]:
+        """Create sessions for all configured servers.
+        Args:
+            auto_initialize: Whether to automatically initialize the sessions.
+        Returns:
+            Dictionary mapping server names to their MCPSession instances.
+        Warns:
+            UserWarning: If no servers are configured.
+        """
+        servers = self._get_mcp_servers()
+        if not servers:
+            warnings.warn("No MCP servers defined in config", UserWarning, stacklevel=2)
+            return {}
+        # Create sessions for all servers (create_session already handles initialization)
+        logger.debug(f"Creating sessions for {len(servers)} servers")
+        for name in servers:
+            try:
+                await self.create_session(name, auto_initialize)
+            except Exception as e:
+                logger.error(f"Failed to create session for server '{name}': {e}")
+        logger.info(f"Created {len(self.sessions)} MCP sessions")
+        return self.sessions
+    def get_session(self, server_name: str) -> MCPSession:
+        """Get an existing session.
+        Args:
+            server_name: The name of the server to get the session for.
+                        If None, uses the first active session.
+        Returns:
+            The MCPSession for the specified server.
+        Raises:
+            ValueError: If no active sessions exist or the specified session doesn't exist.
+        """
+        if server_name not in self.sessions:
+            raise ValueError(f"No session exists for server '{server_name}'")
+        return self.sessions[server_name]
+    def get_all_active_sessions(self) -> dict[str, MCPSession]:
+        """Get all active sessions.
+        Returns:
+            Dictionary mapping server names to their MCPSession instances.
+        """
+        return {name: self.sessions[name] for name in self.active_sessions if name in self.sessions}
+    async def close_session(self, server_name: str) -> None:
+        """Close a session.
+        Args:
+            server_name: The name of the server to close the session for.
+        Raises:
+            ValueError: If no active sessions exist or the specified session doesn't exist.
+        """
+        # Check if the session exists
+        if server_name not in self.sessions:
+            logger.warning(f"No session exists for server '{server_name}', nothing to close")
+            return
+        # Get the session
+        session = self.sessions[server_name]
+        error_occurred = False
+        try:
+            # Disconnect from the session
+            logger.debug(f"Closing session for server '{server_name}'")
+            await session.disconnect()
+            logger.info(f"Successfully closed session for server '{server_name}'")
+        except Exception as e:
+            error_occurred = True
+            logger.error(f"Error closing session for server '{server_name}': {e}")
+        finally:
+            # Remove the session regardless of whether disconnect succeeded
+            self.sessions.pop(server_name, None)
+            # Remove from active_sessions
+            if server_name in self.active_sessions:
+                self.active_sessions.remove(server_name)
+            if error_occurred:
+                logger.warning(f"Session for '{server_name}' removed from tracking despite disconnect error")
+    async def close_all_sessions(self) -> None:
+        """Close all active sessions.
+        This method ensures all sessions are closed even if some fail.
+        """
+        # Get a list of all session names first to avoid modification during iteration
+        server_names = list(self.sessions.keys())
+        errors = []
+        for server_name in server_names:
+            try:
+                logger.debug(f"Closing session for server '{server_name}'")
+                await self.close_session(server_name)
+            except Exception as e:
+                error_msg = f"Failed to close session for server '{server_name}': {e}"
+                logger.error(error_msg)
+                errors.append(error_msg)
+        # Log summary if there were errors
+        if errors:
+            logger.error(f"Encountered {len(errors)} errors while closing sessions")
+        else:
+            logger.debug("All sessions closed successfully")

openspace/grounding/backends/mcp/config.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""
+Configuration loader for MCP session.
+This module provides functionality to load MCP configuration from JSON files.
+"""
+from typing import Any, Optional
+from openspace.grounding.core.types import SandboxOptions
+from openspace.config.utils import get_config_value
+from .transport.connectors import (
+    MCPBaseConnector,
+    HttpConnector,
+    SandboxConnector,
+    StdioConnector,
+    WebSocketConnector,
+)
+from .transport.connectors.utils import is_stdio_server
+from .installer import MCPInstallerManager
+# Import E2BSandbox
+try:
+    from openspace.grounding.core.security import E2BSandbox
+    E2B_AVAILABLE = True
+except ImportError:
+    E2BSandbox = None
+    E2B_AVAILABLE = False
+async def create_connector_from_config(
+    server_config: dict[str, Any],
+    server_name: str = "unknown",
+    sandbox: bool = False,
+    sandbox_options: SandboxOptions | None = None,
+    timeout: float = 30.0,
+    sse_read_timeout: float = 300.0,
+    installer: Optional[MCPInstallerManager] = None,
+    check_dependencies: bool = True,
+    tool_call_max_retries: int = 3,
+    tool_call_retry_delay: float = 1.0,
+) -> MCPBaseConnector:
+    """Create a connector based on server configuration.
+    Args:
+        server_config: The server configuration section
+        server_name: Name of the MCP server (for display purposes)
+        sandbox: Whether to use sandboxed execution mode for running MCP servers.
+        sandbox_options: Optional sandbox configuration options.
+        timeout: Timeout for operations in seconds (default: 30.0)
+        sse_read_timeout: SSE read timeout in seconds (default: 300.0)
+        installer: Optional installer manager for dependency installation
+        check_dependencies: Whether to check and install dependencies (default: True)
+        tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
+        tool_call_retry_delay: Initial delay between retries in seconds (default: 1.0)
+    Returns:
+        A configured connector instance
+    Raises:
+        RuntimeError: If dependencies are not installed and user declines installation
+    """
+    # Get original command and args from config
+    original_command = get_config_value(server_config, "command")
+    original_args = get_config_value(server_config, "args", [])
+    # Check and install dependencies if needed (only for stdio servers)
+    if is_stdio_server(server_config) and check_dependencies:
+        # Use provided installer or get global instance
+        if installer is None:
+            from .installer import get_global_installer
+            installer = get_global_installer()
+        # Ensure dependencies are installed (using original command/args)
+        await installer.ensure_dependencies(server_name, original_command, original_args)
+    # Stdio connector (command-based)
+    if is_stdio_server(server_config) and not sandbox:
+        return StdioConnector(
+            command=get_config_value(server_config, "command"),
+            args=get_config_value(server_config, "args"),
+            env=get_config_value(server_config, "env", None),
+        )
+    # Sandboxed connector
+    elif is_stdio_server(server_config) and sandbox:
+        if not E2B_AVAILABLE:
+            raise ImportError(
+                "E2B sandbox support not available. Please install e2b-code-interpreter: "
+                "'pip install e2b-code-interpreter'"
+            )
+        # Create E2B sandbox instance
+        _sandbox_options = sandbox_options or {}
+        e2b_sandbox = E2BSandbox(_sandbox_options)
+        # Extract timeout values from sandbox_options or use defaults
+        connector_timeout = _sandbox_options.get("timeout", timeout)
+        connector_sse_timeout = _sandbox_options.get("sse_read_timeout", sse_read_timeout)
+        # Create and return sandbox connector
+        return SandboxConnector(
+            sandbox=e2b_sandbox,
+            command=get_config_value(server_config, "command"),
+            args=get_config_value(server_config, "args"),
+            env=get_config_value(server_config, "env", None),
+            supergateway_command=_sandbox_options.get("supergateway_command", "npx -y supergateway"),
+            port=_sandbox_options.get("port", 3000),
+            timeout=connector_timeout,
+            sse_read_timeout=connector_sse_timeout,
+        )
+    # HTTP connector
+    elif "url" in server_config:
+        return HttpConnector(
+            base_url=get_config_value(server_config, "url"),
+            headers=get_config_value(server_config, "headers", None),
+            auth_token=get_config_value(server_config, "auth_token", None),
+            timeout=timeout,
+            sse_read_timeout=sse_read_timeout,
+            tool_call_max_retries=tool_call_max_retries,
+            tool_call_retry_delay=tool_call_retry_delay,
+        )
+    # WebSocket connector
+    elif "ws_url" in server_config:
+        return WebSocketConnector(
+            url=get_config_value(server_config, "ws_url"),
+            headers=get_config_value(server_config, "headers", None),
+            auth_token=get_config_value(server_config, "auth_token", None),
+        )
+    raise ValueError("Cannot determine connector type from config")

openspace/grounding/backends/mcp/installer.py ADDED Viewed

	@@ -0,0 +1,697 @@

+import asyncio
+import sys
+import shutil
+from typing import Callable, Awaitable, Optional, Dict, List
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+PromptFunc = Callable[[str], Awaitable[bool]]
+# Global lock to prevent concurrent user prompts
+_prompt_lock = asyncio.Lock()
+class MCPDependencyError(RuntimeError):
+    """Base exception for MCP dependency errors."""
+    pass
+class MCPCommandNotFoundError(MCPDependencyError):
+    """Raised when a required command is not available."""
+    pass
+class MCPInstallationCancelledError(MCPDependencyError):
+    """Raised when user cancels installation."""
+    pass
+class MCPInstallationFailedError(MCPDependencyError):
+    """Raised when installation fails."""
+    pass
+class Colors:
+    RESET = "\033[0m"
+    BOLD = "\033[1m"
+    RED = "\033[91m"
+    YELLOW = "\033[93m"
+    GREEN = "\033[92m"
+    CYAN = "\033[96m"
+    GRAY = "\033[90m"
+    WHITE = "\033[97m"
+    BLUE = "\033[94m"
+class MCPInstallerManager:
+    """
+    MCP dependencies package installer manager.
+    Responsible for detecting if the MCP server dependencies are installed, and if not, asking the user whether to install them.
+    """
+    def __init__(self, prompt: PromptFunc | None = None, auto_install: bool = False, verbose: bool = False):
+        """Initialize the installer manager.
+        Args:
+            prompt: Custom user prompt function, if None, the default CLI prompt is used
+            auto_install: If True, automatically install dependencies without asking the user
+            verbose: If True, show detailed installation logs; if False, only show progress indicator
+        """
+        self._prompt: PromptFunc | None = prompt or self._default_cli_prompt
+        self._auto_install = auto_install
+        self._verbose = verbose
+        self._installed_cache: Dict[str, bool] = {}  # Cache for checked packages
+        self._failed_installations: Dict[str, str] = {}  # Track failed installations to avoid retry
+    async def _default_cli_prompt(self, message: str) -> bool:
+        """Default CLI prompt function (called within lock by ensure_dependencies)."""
+        from openspace.utils.display import print_separator, colorize
+        print()
+        print_separator(70, 'c', 2)
+        print(f"  {colorize('MCP dependencies installation prompt', color=Colors.BLUE, bold=True)}")
+        print_separator(70, 'c', 2)
+        print(f"  {message}")
+        print_separator(70, 'gr', 2)
+        print(f"  {colorize('[y/yes]', color=Colors.GREEN)} Install  |  {colorize('[n/no]', color=Colors.RED)} Cancel")
+        print_separator(70, 'gr', 2)
+        print(f"  {colorize('Your choice:', bold=True)} ", end="", flush=True)
+        answer = await asyncio.get_running_loop().run_in_executor(None, sys.stdin.readline)
+        response = answer.strip().lower() in {"y", "yes"}
+        if response:
+            print(f"{Colors.GREEN}✓ Installation confirmed{Colors.RESET}\n")
+        else:
+            print(f"{Colors.RED}✗ Installation cancelled{Colors.RESET}\n")
+        return response
+    async def _ask_user(self, message: str) -> bool:
+        """Ask the user whether to install."""
+        if self._auto_install:
+            logger.info("Automatic installation mode enabled, will automatically install dependencies")
+            return True
+        if self._prompt:
+            try:
+                return await self._prompt(message)
+            except Exception as e:
+                logger.error(f"Error asking user: {e}")
+                return False
+        return False
+    def _check_command_available(self, command: str) -> bool:
+        """Check if the command is available.
+        Args:
+            command: The command to check (e.g. "npx", "uvx")
+        Returns:
+            bool: Whether the command is available
+        """
+        return shutil.which(command) is not None
+    async def _check_package_installed(self, command: str, args: List[str]) -> bool:
+        """Check if the package is installed.
+        Args:
+            command: The command to check (e.g. "npx", "uvx")
+            args: The arguments list
+        Returns:
+            bool: Whether the package is installed
+        """
+        # Build cache key
+        cache_key = f"{command}:{':'.join(args)}"
+        # Check cache
+        if cache_key in self._installed_cache:
+            return self._installed_cache[cache_key]
+        # For different types of commands, use different check methods
+        try:
+            if command == "npx":
+                # For npx, check if the npm package exists
+                package_name = self._extract_npm_package(args)
+                if package_name:
+                    result = await self._check_npm_package(package_name)
+                    self._installed_cache[cache_key] = result
+                    return result
+            elif command == "uvx":
+                # For uvx, check if the Python package exists
+                package_name = self._extract_python_package(args)
+                if package_name:
+                    result = await self._check_python_package(package_name)
+                    self._installed_cache[cache_key] = result
+                    return result
+            elif command == "uv":
+                # For "uv run --with package ...", check if the Python package exists
+                package_name = self._extract_uv_package(args)
+                if package_name:
+                    result = await self._check_uv_pip_package(package_name)
+                    self._installed_cache[cache_key] = result
+                    return result
+        except Exception as e:
+            logger.debug(f"Error checking package installation status: {e}")
+        # Default to assuming not installed
+        return False
+    def _extract_npm_package(self, args: List[str]) -> Optional[str]:
+        """Extract package name from npx arguments.
+        Args:
+            args: npx arguments list, e.g. ["-y", "mcp-excalidraw-server"] or ["bazi-mcp"]
+        Returns:
+            Package name (without version tag) or None
+        """
+        for i, arg in enumerate(args):
+            # Skip option parameters
+            if arg.startswith("-"):
+                continue
+            # Found package name, now strip version tag
+            package_name = arg
+            # Handle scoped packages: @scope/package@version -> @scope/package
+            if package_name.startswith("@"):
+                # Scoped package like @rtuin/mcp-mermaid-validator@latest
+                parts = package_name.split("/", 1)
+                if len(parts) == 2:
+                    scope = parts[0]
+                    name_with_version = parts[1]
+                    # Remove version tag from name part (e.g., "pkg@latest" -> "pkg")
+                    name = name_with_version.split("@")[0] if "@" in name_with_version else name_with_version
+                    return f"{scope}/{name}"
+                return package_name
+            else:
+                # Regular package like mcp-deepwiki@latest -> mcp-deepwiki
+                return package_name.split("@")[0] if "@" in package_name else package_name
+        return None
+    def _extract_python_package(self, args: List[str]) -> Optional[str]:
+        """Extract package name from uvx arguments.
+        Args:
+            args: uvx arguments list, e.g. ["--from", "office-powerpoint-mcp-server", "ppt_mcp_server"]
+                  or ["--with", "mcp==1.9.0", "sitemap-mcp-server"]
+                  or ["arxiv-mcp-server", "--storage-path", "./path"]
+        Returns:
+            Package name or None
+        """
+        # Find --from parameter (this is the package to install)
+        for i, arg in enumerate(args):
+            if arg == "--from" and i + 1 < len(args):
+                return args[i + 1]
+        # Skip option flags and their values, find the main package (FIRST positional arg)
+        # Options that take a value: --with, --python, --from, --storage-path, etc.
+        options_with_value = {"--with", "--from", "--python", "-p", "--storage-path"}
+        skip_next = False
+        for arg in args:
+            if skip_next:
+                skip_next = False
+                continue
+            if arg in options_with_value:
+                skip_next = True
+                continue
+            if arg.startswith("-"):
+                # Other flags without values (or unknown options with values)
+                # Also skip the next arg if it looks like an option value (doesn't start with -)
+                continue
+            # First non-option argument is the package name
+            return arg
+        return None
+    def _extract_uv_package(self, args: List[str]) -> Optional[str]:
+        """Extract package name from uv run arguments.
+        Args:
+            args: uv arguments list, e.g. ["run", "--with", "biomcp-python", "biomcp", "run"]
+        Returns:
+            Package name or None
+        """
+        # Find --with parameter (this specifies the package to install)
+        for i, arg in enumerate(args):
+            if arg == "--with" and i + 1 < len(args):
+                package_name = args[i + 1]
+                # Remove version specifier if present (e.g., "mcp==1.9.0" -> "mcp")
+                if "==" in package_name:
+                    return package_name.split("==")[0]
+                if ">=" in package_name:
+                    return package_name.split(">=")[0]
+                return package_name
+        return None
+    async def _check_npm_package(self, package_name: str) -> bool:
+        """Check if the npm package is globally installed.
+        Args:
+            package_name: npm package name
+        Returns:
+            bool: Whether the npm package is installed
+        """
+        try:
+            process = await asyncio.create_subprocess_exec(
+                "npm", "list", "-g", package_name,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            stdout, stderr = await process.communicate()
+            # npm list returns 0 if the package is installed
+            return process.returncode == 0
+        except Exception as e:
+            logger.debug(f"Error checking npm package {package_name}: {e}")
+            return False
+    async def _check_python_package(self, package_name: str) -> bool:
+        """Check if the Python package is installed as a uvx tool.
+        uvx tools are installed in ~/.local/share/uv/tools/ directory,
+        not in the current pip environment.
+        Args:
+            package_name: Python package/tool name
+        Returns:
+            bool: Whether the uvx tool is installed
+        """
+        import os
+        from pathlib import Path
+        # Strip version specifier if present (e.g., "mcp==1.9.0" -> "mcp")
+        clean_name = package_name.split("==")[0].split(">=")[0].split("<=")[0].split(">")[0].split("<")[0]
+        # Check if uvx tool exists in the standard uv tools directory
+        uv_tools_dir = Path.home() / ".local" / "share" / "uv" / "tools"
+        tool_dir = uv_tools_dir / clean_name
+        if tool_dir.exists():
+            logger.debug(f"uvx tool '{clean_name}' found at {tool_dir}")
+            return True
+        # Fallback: try running uvx with --help to check if it's available
+        try:
+            process = await asyncio.create_subprocess_exec(
+                "uvx", clean_name, "--help",
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            # Just wait briefly, don't need the full output
+            try:
+                await asyncio.wait_for(process.communicate(), timeout=5.0)
+            except asyncio.TimeoutError:
+                process.kill()
+                await process.wait()
+            # If it didn't error immediately, the tool likely exists
+            return process.returncode == 0
+        except Exception as e:
+            logger.debug(f"Error checking uvx tool {clean_name}: {e}")
+        return False
+    async def _check_uv_pip_package(self, package_name: str) -> bool:
+        """Check if a Python package is installed via uv pip.
+        Args:
+            package_name: Python package name
+        Returns:
+            bool: Whether the package is installed
+        """
+        # Strip version specifier if present
+        clean_name = package_name.split("==")[0].split(">=")[0].split("<=")[0].split(">")[0].split("<")[0]
+        try:
+            # Try using uv pip show to check if package is installed
+            process = await asyncio.create_subprocess_exec(
+                "uv", "pip", "show", clean_name,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            stdout, stderr = await process.communicate()
+            if process.returncode == 0:
+                logger.debug(f"uv pip package '{clean_name}' found")
+                return True
+        except Exception as e:
+            logger.debug(f"Error checking uv pip package {clean_name}: {e}")
+        # Fallback: check with regular pip
+        try:
+            process = await asyncio.create_subprocess_exec(
+                "pip", "show", clean_name,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE
+            )
+            stdout, stderr = await process.communicate()
+            return process.returncode == 0
+        except Exception as e:
+            logger.debug(f"Error checking pip package {clean_name}: {e}")
+        return False
+    async def _install_package(self, command: str, args: List[str], use_sudo: bool = False) -> bool:
+        """Execute the install command.
+        Args:
+            command: The command to execute (e.g. "npx", "uvx")
+            args: The arguments list
+            use_sudo: Whether to use sudo for installation
+        Returns:
+            bool: Whether the installation is successful
+        """
+        install_command = self._get_install_command(command, args)
+        if not install_command:
+            logger.error("Cannot determine install command")
+            return False
+        # Add sudo if requested
+        if use_sudo:
+            install_command = ["sudo"] + install_command
+        logger.info(f"Executing install command: {' '.join(install_command)}")
+        try:
+            # For sudo commands, always show verbose output so password prompt is visible
+            if self._verbose or use_sudo:
+                # Verbose mode: show all installation logs
+                from openspace.utils.display import print_separator, colorize
+                print_separator(70, 'c', 2)
+                if use_sudo:
+                    print(f"  {colorize('Installing with administrator privileges...', color=Colors.BLUE)}")
+                    print(f"  {colorize('>> You will be prompted for your password below <<', color=Colors.YELLOW)}")
+                else:
+                    print(f"  {colorize('Installing dependencies...', color=Colors.BLUE)}")
+                print(f"  {colorize('Command: ' + ' '.join(install_command), color=Colors.GRAY)}")
+                print_separator(70, 'c', 2)
+                print()
+                # For sudo, don't redirect stdin so password prompt works
+                if use_sudo:
+                    process = await asyncio.create_subprocess_exec(
+                        *install_command,
+                        stdout=asyncio.subprocess.PIPE,
+                        stderr=asyncio.subprocess.STDOUT,
+                        stdin=None  # Let sudo use terminal for password
+                    )
+                else:
+                    process = await asyncio.create_subprocess_exec(
+                        *install_command,
+                        stdout=asyncio.subprocess.PIPE,
+                        stderr=asyncio.subprocess.STDOUT
+                    )
+                # Real-time output of installation logs
+                output_lines = []
+                while True:
+                    line = await process.stdout.readline()
+                    if not line:
+                        break
+                    line_str = line.decode().rstrip()
+                    output_lines.append(line_str)
+                    print(f"{Colors.GRAY}{line_str}{Colors.RESET}")
+                await process.wait()
+                full_output = '\n'.join(output_lines)
+            else:
+                # Quiet mode: only show progress indicator
+                print(f"\n{Colors.BLUE}Installing dependencies...{Colors.RESET} ", end="", flush=True)
+                process = await asyncio.create_subprocess_exec(
+                    *install_command,
+                    stdout=asyncio.subprocess.PIPE,
+                    stderr=asyncio.subprocess.PIPE
+                )
+                # Show spinner animation while installing
+                spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
+                spinner_idx = 0
+                while True:
+                    try:
+                        await asyncio.wait_for(process.wait(), timeout=0.1)
+                        break
+                    except asyncio.TimeoutError:
+                        print(f"\r{Colors.BLUE}Installing dependencies...{Colors.RESET} {Colors.CYAN}{spinner[spinner_idx]}{Colors.RESET}", end="", flush=True)
+                        spinner_idx = (spinner_idx + 1) % len(spinner)
+                # Clear the spinner line
+                print(f"\r{' ' * 100}\r", end="", flush=True)
+                # Collect output
+                stdout, stderr = await process.communicate()
+                full_output = (stdout or stderr).decode() if (stdout or stderr) else ""
+            if process.returncode == 0:
+                print(f"{Colors.GREEN}✓ Dependencies installed successfully{Colors.RESET}")
+                if not use_sudo:
+                    print(f"{Colors.GRAY}(Note: First connection may take a moment to initialize){Colors.RESET}")
+                # Update cache
+                cache_key = f"{command}:{':'.join(args)}"
+                self._installed_cache[cache_key] = True
+                return True
+            else:
+                # Check if it's a permission error
+                is_permission_error = "EACCES" in full_output or "permission denied" in full_output.lower()
+                if is_permission_error and not use_sudo:
+                    print(f"\n{Colors.YELLOW}Permission denied{Colors.RESET}")
+                    print(f"{Colors.GRAY}The installation requires administrator privileges.{Colors.RESET}\n")
+                    # Ask user if they want to use sudo
+                    message = (
+                        f"\n{Colors.WHITE}Administrator privileges required{Colors.RESET}\n\n"
+                        f"Command: {Colors.GRAY}{' '.join(install_command)}{Colors.RESET}\n\n"
+                        f"{Colors.YELLOW}Do you want to retry with sudo (requires password)?{Colors.RESET}"
+                    )
+                    if await self._ask_user(message):
+                        # No extra print needed, the verbose mode will show clear instructions
+                        return await self._install_package(command, args, use_sudo=True)
+                    else:
+                        print(f"\n{Colors.RED}✗ Installation cancelled{Colors.RESET}")
+                        return False
+                else:
+                    print(f"{Colors.RED}✗ Dependencies installation failed (return code: {process.returncode}){Colors.RESET}")
+                    # Show error output if not already shown
+                    if not self._verbose and full_output:
+                        # Limit error output to last 20 lines
+                        error_lines = full_output.split('\n')
+                        if len(error_lines) > 20:
+                            error_lines = ['...(truncated)...'] + error_lines[-20:]
+                        print(f"{Colors.GRAY}Error output:\n{chr(10).join(error_lines)}{Colors.RESET}")
+                    # Add general guidance for manual installation
+                    print(f"\n{Colors.YELLOW}Tip:{Colors.RESET} {Colors.GRAY}If automatic installation fails, please refer to the")
+                    print(f"official documentation of the MCP server for manual installation instructions.{Colors.RESET}\n")
+                    return False
+        except Exception as e:
+            logger.error(f"Error installing dependencies: {e}")
+            print(f"{Colors.RED}✗ Error occurred during installation: {e}{Colors.RESET}")
+            return False
+    def _get_install_command(self, command: str, args: List[str]) -> Optional[List[str]]:
+        """Generate install command based on command type.
+        Args:
+            command: The command to execute (e.g. "npx", "uvx", "uv")
+            args: The original arguments list
+        Returns:
+            Install command list or None
+        """
+        if command == "npx":
+            package_name = self._extract_npm_package(args)
+            if package_name:
+                return ["npm", "install", "-g", package_name]
+        elif command == "uvx":
+            package_name = self._extract_python_package(args)
+            if package_name:
+                return ["pip", "install", package_name]
+        elif command == "uv":
+            # Handle "uv run --with package_name ..." format
+            package_name = self._extract_uv_package(args)
+            if package_name:
+                return ["uv", "pip", "install", package_name]
+        return None
+    async def ensure_dependencies(
+        self,
+        server_name: str,
+        command: str,
+        args: List[str]
+    ) -> bool:
+        """Ensure the dependencies of the MCP server are installed.
+        This method checks if the dependencies are installed, and if not, asks the user whether to install them.
+        Args:
+            server_name: MCP server name (for display purposes)
+            command: The command to execute (e.g. "npx", "uvx")
+            args: The arguments list
+        Returns:
+            bool: Whether the dependencies are installed (installed or successfully installed)
+        Raises:
+            RuntimeError: When the command is not available or the user refuses to install
+        """
+        # Use lock to ensure entire installation process is atomic
+        async with _prompt_lock:
+            return await self._ensure_dependencies_impl(server_name, command, args)
+    async def _ensure_dependencies_impl(
+        self,
+        server_name: str,
+        command: str,
+        args: List[str]
+    ) -> bool:
+        """Internal implementation of ensure_dependencies (called within lock)."""
+        # Skip dependency checking for direct script execution commands
+        # These commands run scripts directly and don't need package installation
+        SKIP_COMMANDS = {"node", "python", "python3", "bash", "sh", "deno", "bun"}
+        if command.lower() in SKIP_COMMANDS:
+            logger.debug(f"Skipping dependency check for direct script execution command: {command}")
+            return True
+        # Skip dependency checking for GitHub-based npx packages
+        # These packages are handled directly by npx which downloads, builds, and runs them
+        # npm install -g doesn't work properly for GitHub packages that require building
+        if command == "npx":
+            package_name = self._extract_npm_package(args)
+            if package_name and package_name.startswith("github:"):
+                logger.debug(f"Skipping dependency check for GitHub-based npx package: {package_name}")
+                return True
+        # Check if this server has already failed installation
+        cache_key = f"{server_name}:{command}:{':'.join(args)}"
+        if cache_key in self._failed_installations:
+            error_msg = self._failed_installations[cache_key]
+            logger.debug(f"Skipping installation for '{server_name}' - previously failed")
+            raise MCPDependencyError(error_msg)
+        # Special handling for uvx - check if uv is installed
+        if command == "uvx":
+            if not self._check_command_available("uv"):
+                # Only show once to user, no verbose logging
+                print(f"\n{Colors.RED}✗ Server '{server_name}' requires 'uv' to be installed{Colors.RESET}")
+                print(f"{Colors.YELLOW}Please install uv first:")
+                print(f"  • macOS/Linux: curl -LsSf https://astral.sh/uv/install.sh | sh")
+                print(f"  • Or with pip: pip install uv")
+                print(f"  • Or with brew: brew install uv{Colors.RESET}\n")
+                error_msg = f"uvx requires 'uv' to be installed (server: {server_name})"
+                self._failed_installations[cache_key] = error_msg
+                raise MCPCommandNotFoundError(error_msg)
+        # Check if the command is available
+        if not self._check_command_available(command):
+            error_msg = (
+                f"Command '{command}' is not available.\n"
+                f"Please install the necessary tools first."
+            )
+            logger.error(error_msg)
+            self._failed_installations[cache_key] = error_msg
+            raise MCPCommandNotFoundError(error_msg)
+        # Check if the package is installed
+        if await self._check_package_installed(command, args):
+            logger.debug(f"The dependencies of the MCP server '{server_name}' are installed")
+            return True
+        # Extract package name for display
+        if command == "npx":
+            package_name = self._extract_npm_package(args)
+            package_type = "npm"
+        elif command == "uvx":
+            package_name = self._extract_python_package(args)
+            package_type = "Python"
+        elif command == "uv":
+            package_name = self._extract_uv_package(args)
+            package_type = "Python"
+        else:
+            package_name = f"{command} {' '.join(args)}"
+            package_type = "package"
+        # Build the message for displaying the install command
+        install_cmd = self._get_install_command(command, args)
+        # If we can't determine an install command, show helpful message
+        if not install_cmd:
+            print(f"\n{Colors.YELLOW}Cannot automatically install dependencies for '{server_name}'{Colors.RESET}")
+            print(f"{Colors.GRAY}Command: {command} {' '.join(args)}{Colors.RESET}")
+            print(f"\n{Colors.WHITE}This MCP server may require manual installation or configuration.{Colors.RESET}")
+            print(f"{Colors.GRAY}Please refer to the MCP server's official documentation for installation instructions.{Colors.RESET}\n")
+            error_msg = f"Manual installation required for '{server_name}' (command: {command})"
+            self._failed_installations[cache_key] = error_msg
+            raise MCPDependencyError(error_msg)
+        install_cmd_str = ' '.join(install_cmd)
+        # Build the message
+        message = (
+            f"\n{Colors.WHITE}The MCP server needs to install dependencies{Colors.RESET}\n\n"
+            f"Server name: {Colors.CYAN}{server_name}{Colors.RESET}\n"
+            f"Package type: {Colors.YELLOW}{package_type}{Colors.RESET}\n"
+            f"Package name: {Colors.YELLOW}{package_name or 'Unknown'}{Colors.RESET}\n"
+            f"Install command: {Colors.GRAY}{install_cmd_str}{Colors.RESET}\n\n"
+            f"{Colors.YELLOW}Whether to install this dependency package?{Colors.RESET}"
+        )
+        # Ask the user
+        if not await self._ask_user(message):
+            error_msg = f"User cancelled the dependency installation for '{server_name}'"
+            logger.warning(error_msg)
+            self._failed_installations[cache_key] = error_msg
+            raise MCPInstallationCancelledError(error_msg)
+        # Execute installation
+        success = await self._install_package(command, args)
+        if not success:
+            error_msg = f"Dependency installation failed for '{server_name}'"
+            logger.error(error_msg)
+            self._failed_installations[cache_key] = error_msg
+            raise MCPInstallationFailedError(error_msg)
+        return True
+# Global singleton instance
+_global_installer: Optional[MCPInstallerManager] = None
+def get_global_installer() -> MCPInstallerManager:
+    """Get the global installer manager instance."""
+    global _global_installer
+    if _global_installer is None:
+        _global_installer = MCPInstallerManager()
+    return _global_installer
+def set_global_installer(installer: MCPInstallerManager) -> None:
+    """Set the global installer manager instance."""
+    global _global_installer
+    _global_installer = installer

openspace/grounding/backends/mcp/provider.py ADDED Viewed

	@@ -0,0 +1,473 @@

+"""
+MCP Provider implementation.
+This module provides a provider for managing MCP server sessions.
+"""
+import asyncio
+from typing import Dict, List, Optional
+from openspace.grounding.backends.mcp.session import MCPSession
+from openspace.grounding.core.provider import Provider
+from openspace.grounding.core.types import SessionConfig, BackendType, ToolSchema
+from openspace.grounding.backends.mcp.client import MCPClient
+from openspace.grounding.backends.mcp.installer import MCPInstallerManager, MCPDependencyError
+from openspace.grounding.backends.mcp.tool_cache import get_tool_cache
+from openspace.grounding.backends.mcp.tool_converter import _sanitize_mcp_schema
+from openspace.grounding.core.tool import BaseTool, RemoteTool
+from openspace.utils.logging import Logger
+from openspace.config.utils import get_config_value
+logger = Logger.get_logger(__name__)
+class MCPProvider(Provider[MCPSession]):
+    """
+    MCP Provider manages multiple MCP server sessions.
+    Each MCP server defined in config corresponds to one session.
+    The provider handles lazy/eager session creation and tool aggregation.
+    """
+    def __init__(self, config: Dict | None = None, installer: Optional[MCPInstallerManager] = None):
+        """Initialize MCP Provider.
+        Args:
+            config: Configuration dict with MCP server definitions.
+                   Example: {"mcpServers": {"server1": {...}, "server2": {...}}}
+            installer: Optional installer manager for dependency installation
+        """
+        super().__init__(BackendType.MCP, config)
+        # Extract MCP-specific configuration
+        sandbox = get_config_value(config, "sandbox", False)
+        timeout = get_config_value(config, "timeout", 30)
+        sse_read_timeout = get_config_value(config, "sse_read_timeout", 300.0)
+        max_retries = get_config_value(config, "max_retries", 3)
+        retry_interval = get_config_value(config, "retry_interval", 2.0)
+        check_dependencies = get_config_value(config, "check_dependencies", True)
+        auto_install = get_config_value(config, "auto_install", False)
+        # Tool call retry settings (for transient errors like 400, 500, etc.)
+        tool_call_max_retries = get_config_value(config, "tool_call_max_retries", 3)
+        tool_call_retry_delay = get_config_value(config, "tool_call_retry_delay", 1.0)
+        # Create sandbox options if sandbox is enabled
+        sandbox_options = None
+        if sandbox:
+            sandbox_options = {
+                "timeout": timeout,
+                "sse_read_timeout": sse_read_timeout,
+            }
+        # Create installer with auto_install setting if not provided
+        if installer is None and auto_install:
+            installer = MCPInstallerManager(auto_install=True)
+        # Initialize MCPClient with configuration
+        self._client = MCPClient(
+            config=config or {},
+            sandbox=sandbox,
+            sandbox_options=sandbox_options,
+            timeout=timeout,
+            sse_read_timeout=sse_read_timeout,
+            max_retries=max_retries,
+            retry_interval=retry_interval,
+            installer=installer,
+            check_dependencies=check_dependencies,
+            tool_call_max_retries=tool_call_max_retries,
+            tool_call_retry_delay=tool_call_retry_delay,
+        )
+        # Map server name to session for quick lookup
+        self._server_sessions: Dict[str, MCPSession] = {}
+    async def initialize(self) -> None:
+        """Initialize the MCP provider.
+        If config["eager_sessions"] is True, creates sessions for all configured servers.
+        Otherwise, sessions are created lazily on first access.
+        """
+        if self.is_initialized:
+            return
+        # config can be dict or Pydantic model, use utility function
+        eager = get_config_value(self.config, "eager_sessions", False)
+        if eager:
+            servers = self.list_servers()
+            logger.debug(f"Eagerly initializing {len(servers)} MCP server sessions")
+            for srv in servers:
+                if srv not in self._server_sessions:
+                    cfg = SessionConfig(
+                        session_name=f"mcp-{srv}",
+                        backend_type=BackendType.MCP,
+                        connection_params={"server": srv},
+                    )
+                    await self.create_session(cfg)
+        self.is_initialized = True
+        logger.info(
+            f"MCPProvider initialized with {len(self.list_servers())} servers (eager={eager})"
+        )
+    def list_servers(self) -> List[str]:
+        """Return all configured MCP server names from MCPClient config.
+        Returns:
+            List of server names
+        """
+        return self._client.get_server_names()
+    async def create_session(self, session_config: SessionConfig) -> MCPSession:
+        """Create a new MCP session for a specific server.
+        Args:
+            session_config: Must contain 'server' in connection_params
+        Returns:
+            MCPSession instance
+        Raises:
+            ValueError: If 'server' not in connection_params
+            Exception: If session creation or initialization fails
+        """
+        server = get_config_value(session_config.connection_params, "server")
+        if not server:
+            raise ValueError("MCPProvider.create_session requires 'server' in connection_params")
+        # Generate session_id: mcp-<server_name>
+        session_id = f"{self.backend_type.value}-{server}"
+        # Check if session already exists
+        if server in self._server_sessions:
+            logger.debug(f"Session for server '{server}' already exists, returning existing session")
+            return self._server_sessions[server]
+        # Create session through MCPClient
+        try:
+            logger.debug(f"Creating new session for MCP server: {server}")
+            session = await self._client.create_session(server, auto_initialize=True)
+            session.session_id = session_id
+            # Store in both maps
+            self._server_sessions[server] = session
+            self._sessions[session_id] = session
+            logger.info(f"Created MCP session '{session_id}' for server '{server}'")
+            return session
+        except MCPDependencyError as e:
+            # Dependency errors already shown to user, just debug log
+            logger.debug(f"Dependency error for server '{server}': {type(e).__name__}")
+            raise
+        except Exception as e:
+            logger.error(f"Failed to create session for server '{server}': {e}")
+            raise
+    async def close_session(self, session_name: str) -> None:
+        """Close an MCP session by session name.
+        Args:
+            session_name: Session name in format 'mcp-<server_name>'
+        """
+        # Parse server name from session_name (format: mcp-<server_name>)
+        try:
+            prefix, server_name = session_name.split("-", 1)
+            if prefix != self.backend_type.value:
+                raise ValueError(f"Invalid MCP session name format: {session_name}, expected 'mcp-<server_name>'")
+        except ValueError as e:
+            logger.warning(f"Invalid session_name format: {session_name} - {e}")
+            return
+        # Check if session exists
+        if session_name not in self._sessions and server_name not in self._server_sessions:
+            logger.warning(f"Session '{session_name}' not found, nothing to close")
+            return
+        error_occurred = False
+        try:
+            logger.debug(f"Closing MCP session '{session_name}' (server: {server_name})")
+            await self._client.close_session(server_name)
+            logger.info(f"Successfully closed MCP session '{session_name}'")
+        except Exception as e:
+            error_occurred = True
+            logger.error(f"Error closing MCP session '{session_name}': {e}")
+        finally:
+            # Clean up both maps regardless of errors
+            self._server_sessions.pop(server_name, None)
+            self._sessions.pop(session_name, None)
+            if error_occurred:
+                logger.warning(f"Session '{session_name}' removed from tracking despite close error")
+    async def list_tools(self, session_name: str | None = None, use_cache: bool = True) -> List[BaseTool]:
+        """List tools from MCP sessions.
+        Args:
+            session_name: If provided, only list tools from that session.
+                         If None, list tools from all sessions.
+            use_cache: If True, try to load from cache first (no server startup).
+                      If False, start servers and get live tools.
+        Returns:
+            List of BaseTool instances
+        """
+        await self.ensure_initialized()
+        # Case 1: List tools from specific session (always live, no cache)
+        if session_name:
+            sess = self._sessions.get(session_name)
+            if sess:
+                try:
+                    tools = await sess.list_tools()
+                    server_name = session_name.replace(f"{self.backend_type.value}-", "", 1)
+                    for tool in tools:
+                        tool.bind_runtime_info(
+                            backend=self.backend_type,
+                            session_name=session_name,
+                            server_name=server_name,
+                        )
+                    return tools
+                except Exception as e:
+                    logger.error(f"Error listing tools from session '{session_name}': {e}")
+                    return []
+            else:
+                logger.warning(f"Session '{session_name}' not found")
+                return []
+        # Case 2: List tools from all servers
+        # Try cache first if enabled
+        if use_cache:
+            cache = get_tool_cache()
+            if cache.has_cache():
+                tools = self._load_tools_from_cache()
+                if tools:
+                    logger.info(f"Loaded {len(tools)} tools from cache (no server startup)")
+                    return tools
+        # No cache or cache disabled, start servers
+        return await self._list_tools_live()
+    def _load_tools_from_cache(self) -> List[BaseTool]:
+        """Load tools from cache file without starting servers.
+        Priority:
+        1. Try to load from sanitized cache (mcp_tool_cache_sanitized.json)
+        2. If not exists, load from raw cache and sanitize, then save sanitized version
+        """
+        cache = get_tool_cache()
+        config_servers = self.list_servers()
+        # Try sanitized cache first
+        if cache.has_sanitized_cache():
+            logger.debug("Loading from sanitized cache")
+            all_cached_tools = cache.get_all_sanitized_tools()
+            return self._build_tools_from_cache(all_cached_tools, config_servers)
+        # Fall back to raw cache, sanitize and save
+        if cache.has_cache():
+            logger.info("Sanitized cache not found, building from raw cache...")
+            all_cached_tools = cache.get_all_tools()
+            sanitized_servers = self._sanitize_and_save_cache(all_cached_tools, cache)
+            return self._build_tools_from_cache(sanitized_servers, config_servers)
+        return []
+    def _sanitize_and_save_cache(
+        self,
+        raw_tools: Dict[str, List[Dict]],
+        cache
+    ) -> Dict[str, List[Dict]]:
+        """Sanitize raw cache and save to sanitized cache file."""
+        sanitized_servers: Dict[str, List[Dict]] = {}
+        for server_name, tool_list in raw_tools.items():
+            sanitized_tools = []
+            for tool_meta in tool_list:
+                raw_params = tool_meta.get("parameters", {})
+                sanitized_params = _sanitize_mcp_schema(raw_params)
+                sanitized_tools.append({
+                    "name": tool_meta["name"],
+                    "description": tool_meta.get("description", ""),
+                    "parameters": sanitized_params,
+                })
+            sanitized_servers[server_name] = sanitized_tools
+        # Save sanitized cache for future use
+        cache.save_sanitized(sanitized_servers)
+        logger.info(f"Created sanitized cache with {len(sanitized_servers)} servers")
+        return sanitized_servers
+    def _build_tools_from_cache(
+        self,
+        all_cached_tools: Dict[str, List[Dict]],
+        config_servers: List[str]
+    ) -> List[BaseTool]:
+        """Build BaseTool instances from cached tool metadata."""
+        tools: List[BaseTool] = []
+        for server_name in config_servers:
+            tool_list = all_cached_tools.get(server_name)
+            if not tool_list:
+                continue
+            session_name = f"{self.backend_type.value}-{server_name}"
+            for tool_meta in tool_list:
+                schema = ToolSchema(
+                    name=tool_meta["name"],
+                    description=tool_meta.get("description", ""),
+                    parameters=tool_meta.get("parameters", {}),
+                    backend_type=BackendType.MCP,
+                )
+                tool = RemoteTool(schema=schema, connector=None, backend=BackendType.MCP)
+                tool.bind_runtime_info(
+                    backend=self.backend_type,
+                    session_name=session_name,
+                    server_name=server_name,
+                )
+                tools.append(tool)
+        return tools
+    async def _list_tools_live(self) -> List[BaseTool]:
+        """List tools by starting all servers.
+        Uses a semaphore to serialize session creation, avoiding TaskGroup race conditions
+        that occur when multiple MCP connections are initialized concurrently.
+        """
+        servers = self.list_servers()
+        if not servers:
+            logger.warning("No MCP servers configured")
+            return []
+        # Find servers that don't have sessions yet
+        to_create = [s for s in servers if s not in self._server_sessions]
+        # Create missing sessions with serialized execution using semaphore
+        if to_create:
+            logger.info(f"Creating {len(to_create)} MCP sessions (serialized to avoid race conditions)")
+            # Use semaphore with limit=1 to serialize session creation
+            # This avoids TaskGroup race conditions in concurrent HTTP connection setup
+            semaphore = asyncio.Semaphore(1)
+            async def _create_with_semaphore(server: str):
+                async with semaphore:
+                    logger.debug(f"Creating session for '{server}'")
+                    return await self._lazy_create(server)
+            tasks = [_create_with_semaphore(s) for s in to_create]
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+            # Log errors
+            for i, result in enumerate(results):
+                if isinstance(result, MCPDependencyError):
+                    logger.debug(f"Dependency error for '{to_create[i]}': {type(result).__name__}")
+                elif isinstance(result, Exception):
+                    logger.error(f"Failed to create session for '{to_create[i]}': {result}")
+        # Aggregate tools from all sessions
+        uniq: Dict[tuple[str, str], BaseTool] = {}
+        failed_servers = []
+        logger.debug(f"Listing tools from {len(self._server_sessions)} sessions")
+        for server, sess in self._server_sessions.items():
+            try:
+                tools = await sess.list_tools()
+                session_name = f"{self.backend_type.value}-{server}"
+                for tool in tools:
+                    key = (server, tool.schema.name)
+                    if key not in uniq:
+                        tool.bind_runtime_info(
+                            backend=self.backend_type,
+                            session_name=session_name,
+                            server_name=server,
+                        )
+                        uniq[key] = tool
+            except Exception as e:
+                failed_servers.append(server)
+                logger.error(f"Error listing tools from server '{server}': {e}")
+        if failed_servers:
+            logger.warning(f"Failed to list tools from {len(failed_servers)} server(s): {failed_servers}")
+        tools_list = list(uniq.values())
+        logger.debug(f"Listed {len(tools_list)} unique tools from {len(self._server_sessions)} MCP servers")
+        # Save to cache for next time
+        await self._save_tools_to_cache(tools_list)
+        return tools_list
+    async def _save_tools_to_cache(self, tools: List[BaseTool]) -> None:
+        """Save tools metadata to cache file."""
+        cache = get_tool_cache()
+        # Group tools by server
+        servers: Dict[str, List[Dict]] = {}
+        for tool in tools:
+            server_name = tool.runtime_info.server_name if tool.is_bound else "unknown"
+            if server_name not in servers:
+                servers[server_name] = []
+            servers[server_name].append({
+                "name": tool.schema.name,
+                "description": tool.schema.description or "",
+                "parameters": tool.schema.parameters or {},
+            })
+        cache.save(servers)
+    async def ensure_server_session(self, server_name: str) -> Optional[MCPSession]:
+        """Ensure a server session exists, creating it if needed.
+        This is used for on-demand server startup when executing tools.
+        """
+        if server_name in self._server_sessions:
+            return self._server_sessions[server_name]
+        # Server not running, start it
+        logger.info(f"Starting MCP server on-demand: {server_name}")
+        cfg = SessionConfig(
+            session_name=f"mcp-{server_name}",
+            backend_type=BackendType.MCP,
+            connection_params={"server": server_name},
+        )
+        try:
+            session = await self.create_session(cfg)
+            return session
+        except Exception as e:
+            logger.error(f"Failed to start server '{server_name}': {e}")
+            return None
+    async def _lazy_create(self, server: str) -> None:
+        """Internal helper for lazy session creation.
+        Args:
+            server: Server name to create session for
+        Raises:
+            Exception: Re-raises any exception from session creation for error tracking
+        """
+        # Double-check to avoid race conditions
+        if server in self._server_sessions:
+            logger.debug(f"Session for server '{server}' already exists, skipping lazy creation")
+            return
+        cfg = SessionConfig(
+            session_name=f"mcp-{server}",
+            backend_type=BackendType.MCP,
+            connection_params={"server": server},
+        )
+        try:
+            await self.create_session(cfg)
+            logger.debug(f"Lazily created session for server '{server}'")
+        except MCPDependencyError as e:
+            # Dependency errors already shown to user
+            logger.debug(f"Dependency error for server '{server}': {type(e).__name__}")
+            # Re-raise so that asyncio.gather can track the error
+            raise
+        except Exception as e:
+            logger.error(f"Failed to lazily create session for server '{server}': {e}")
+            # Re-raise so that asyncio.gather can track the error
+            raise

openspace/grounding/backends/mcp/session.py ADDED Viewed

	@@ -0,0 +1,75 @@

+"""
+Session manager for MCP connections.
+This module provides a session manager for MCP connections,
+which handles authentication, initialization, and tool discovery.
+"""
+from typing import Any, Dict
+from openspace.grounding.backends.mcp.transport.connectors import MCPBaseConnector
+from openspace.grounding.backends.mcp.tool_converter import convert_mcp_tool_to_base_tool
+from openspace.grounding.core.session import BaseSession
+from openspace.grounding.core.types import BackendType
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+class MCPSession(BaseSession):
+    """Session manager for MCP connections.
+    This class manages the lifecycle of an MCP connection, including
+    authentication, initialization, and tool discovery.
+    """
+    def __init__(
+        self,
+        connector: MCPBaseConnector,
+        *,
+        session_id: str = "",
+        auto_connect: bool = True,
+        auto_initialize: bool = True,
+    ) -> None:
+        """Initialize a new MCP session.
+        Args:
+            connector: The connector to use for communicating with the MCP implementation.
+            session_id: Unique identifier for this session
+            auto_connect: Whether to automatically connect to the MCP implementation.
+            auto_initialize: Whether to automatically initialize the session.
+        """
+        super().__init__(
+            connector=connector,
+            session_id=session_id,
+            backend_type=BackendType.MCP,
+            auto_connect=auto_connect,
+            auto_initialize=auto_initialize,
+        )
+    async def initialize(self) -> Dict[str, Any]:
+        """Initialize the MCP session and discover available tools.
+        Returns:
+            The session information returned by the MCP implementation.
+        """
+        # Make sure we're connected
+        if not self.is_connected and self.auto_connect:
+            await self.connect()
+        # Initialize the session through connector
+        logger.debug(f"Initializing MCP session {self.session_id}")
+        session_info = await self.connector.initialize()
+        # List tools from MCP server and convert to BaseTool
+        mcp_tools = self.connector.tools  # MCPBaseConnector caches tools after initialize
+        logger.debug(f"Converting {len(mcp_tools)} MCP tools to BaseTool")
+        self.tools = [
+            convert_mcp_tool_to_base_tool(mcp_tool, self.connector)
+            for mcp_tool in mcp_tools
+        ]
+        logger.debug(f"MCP session {self.session_id} initialized with {len(self.tools)} tools")
+        return session_info

openspace/grounding/backends/mcp/tool_cache.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import json
+from pathlib import Path
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+# Cache path in project root directory (OpenSpace/)
+# __file__ = .../OpenSpace/openspace/grounding/backends/mcp/tool_cache.py
+# parent x5 = .../OpenSpace/
+DEFAULT_CACHE_PATH = Path(__file__).parent.parent.parent.parent.parent / "mcp_tool_cache.json"
+# Sanitized cache path (Claude API compatible JSON Schema)
+DEFAULT_SANITIZED_CACHE_PATH = Path(__file__).parent.parent.parent.parent.parent / "mcp_tool_cache_sanitized.json"
+class MCPToolCache:
+    """Simple file-based cache for MCP tool metadata."""
+    CACHE_VERSION = 1
+    def __init__(self, cache_path: Optional[Path] = None, sanitized_cache_path: Optional[Path] = None):
+        self.cache_path = cache_path or DEFAULT_CACHE_PATH
+        self.sanitized_cache_path = sanitized_cache_path or DEFAULT_SANITIZED_CACHE_PATH
+        self._cache: Optional[Dict] = None
+        self._sanitized_cache: Optional[Dict] = None
+        self._server_order: Optional[List[str]] = None
+    def set_server_order(self, order: List[str]):
+        """Set expected server order (from config). Used when saving to disk."""
+        self._server_order = order
+    def _reorder_servers(self, servers: Dict[str, List[Dict]]) -> Dict[str, List[Dict]]:
+        """Reorder servers dict according to _server_order."""
+        if not self._server_order:
+            return servers
+        ordered = {}
+        # First add servers in config order
+        for name in self._server_order:
+            if name in servers:
+                ordered[name] = servers[name]
+        # Then add any remaining servers (not in config)
+        for name in servers:
+            if name not in ordered:
+                ordered[name] = servers[name]
+        return ordered
+    def _ensure_dir(self):
+        """Ensure cache directory exists."""
+        self.cache_path.parent.mkdir(parents=True, exist_ok=True)
+    def load(self) -> Dict[str, Any]:
+        """Load cache from disk. Returns empty dict if not exists."""
+        if self._cache is not None:
+            return self._cache
+        if not self.cache_path.exists():
+            self._cache = {"version": self.CACHE_VERSION, "servers": {}}
+            return self._cache
+        try:
+            with open(self.cache_path, "r", encoding="utf-8") as f:
+                self._cache = json.load(f)
+            logger.info(f"Loaded MCP tool cache: {len(self._cache.get('servers', {}))} servers")
+            return self._cache
+        except Exception as e:
+            logger.warning(f"Failed to load cache: {e}")
+            self._cache = {"version": self.CACHE_VERSION, "servers": {}}
+            return self._cache
+    def save(self, servers: Dict[str, List[Dict]]):
+        """
+        Save tool metadata to disk (overwrites existing cache).
+        Args:
+            servers: Dict mapping server_name -> list of tool metadata dicts
+                     Each tool dict should have: name, description, parameters
+        """
+        self._ensure_dir()
+        cache_data = {
+            "version": self.CACHE_VERSION,
+            "updated_at": datetime.now().isoformat(),
+            "servers": servers,
+        }
+        try:
+            with open(self.cache_path, "w", encoding="utf-8") as f:
+                json.dump(cache_data, f, indent=2, ensure_ascii=False)
+            self._cache = cache_data
+            logger.info(f"Saved MCP tool cache: {len(servers)} servers")
+        except Exception as e:
+            logger.error(f"Failed to save cache: {e}")
+    def save_server(self, server_name: str, tools: List[Dict]):
+        """
+        Save/update a single server's tools to cache (incremental append).
+        Args:
+            server_name: Name of the MCP server
+            tools: List of tool metadata dicts for this server
+        """
+        self._ensure_dir()
+        # Load existing cache
+        cache = self.load()
+        # Update server entry
+        if "servers" not in cache:
+            cache["servers"] = {}
+        cache["servers"][server_name] = tools
+        cache["servers"] = self._reorder_servers(cache["servers"])
+        cache["updated_at"] = datetime.now().isoformat()
+        # Save back
+        try:
+            with open(self.cache_path, "w", encoding="utf-8") as f:
+                json.dump(cache, f, indent=2, ensure_ascii=False)
+            self._cache = cache
+            logger.debug(f"Saved {len(tools)} tools for server '{server_name}'")
+        except Exception as e:
+            logger.error(f"Failed to save cache for server '{server_name}': {e}")
+    def get_server_tools(self, server_name: str) -> Optional[List[Dict]]:
+        """Get cached tools for a specific server."""
+        cache = self.load()
+        return cache.get("servers", {}).get(server_name)
+    def get_all_tools(self) -> Dict[str, List[Dict]]:
+        """Get all cached tools, grouped by server."""
+        cache = self.load()
+        return cache.get("servers", {})
+    def has_cache(self) -> bool:
+        """Check if cache exists and has data."""
+        cache = self.load()
+        return bool(cache.get("servers"))
+    def clear(self):
+        """Clear the cache."""
+        if self.cache_path.exists():
+            self.cache_path.unlink()
+        self._cache = None
+        logger.info("MCP tool cache cleared")
+    def save_failed_server(self, server_name: str, error: str):
+        """
+        Record a failed server to cache.
+        Args:
+            server_name: Name of the failed MCP server
+            error: Error message
+        """
+        self._ensure_dir()
+        # Load existing cache
+        cache = self.load()
+        # Add to failed_servers list
+        if "failed_servers" not in cache:
+            cache["failed_servers"] = {}
+        cache["failed_servers"][server_name] = {
+            "error": error,
+            "failed_at": datetime.now().isoformat(),
+        }
+        cache["updated_at"] = datetime.now().isoformat()
+        # Save back
+        try:
+            with open(self.cache_path, "w", encoding="utf-8") as f:
+                json.dump(cache, f, indent=2, ensure_ascii=False)
+            self._cache = cache
+        except Exception as e:
+            logger.error(f"Failed to save failed server '{server_name}': {e}")
+    def get_failed_servers(self) -> Dict[str, Dict]:
+        """Get list of failed servers from cache."""
+        cache = self.load()
+        return cache.get("failed_servers", {})
+    def load_sanitized(self) -> Dict[str, Any]:
+        """Load sanitized cache from disk. Returns empty dict if not exists."""
+        if self._sanitized_cache is not None:
+            return self._sanitized_cache
+        if not self.sanitized_cache_path.exists():
+            self._sanitized_cache = {"version": self.CACHE_VERSION, "servers": {}}
+            return self._sanitized_cache
+        try:
+            with open(self.sanitized_cache_path, "r", encoding="utf-8") as f:
+                self._sanitized_cache = json.load(f)
+            logger.info(f"Loaded sanitized MCP tool cache: {len(self._sanitized_cache.get('servers', {}))} servers")
+            return self._sanitized_cache
+        except Exception as e:
+            logger.warning(f"Failed to load sanitized cache: {e}")
+            self._sanitized_cache = {"version": self.CACHE_VERSION, "servers": {}}
+            return self._sanitized_cache
+    def save_sanitized(self, servers: Dict[str, List[Dict]]):
+        """
+        Save sanitized tool metadata to disk.
+        Args:
+            servers: Dict mapping server_name -> list of sanitized tool metadata dicts
+        """
+        self._ensure_dir()
+        cache_data = {
+            "version": self.CACHE_VERSION,
+            "updated_at": datetime.now().isoformat(),
+            "sanitized": True,
+            "servers": servers,
+        }
+        try:
+            with open(self.sanitized_cache_path, "w", encoding="utf-8") as f:
+                json.dump(cache_data, f, indent=2, ensure_ascii=False)
+            self._sanitized_cache = cache_data
+            logger.info(f"Saved sanitized MCP tool cache: {len(servers)} servers")
+        except Exception as e:
+            logger.error(f"Failed to save sanitized cache: {e}")
+    def get_all_sanitized_tools(self) -> Dict[str, List[Dict]]:
+        """Get all sanitized cached tools, grouped by server."""
+        cache = self.load_sanitized()
+        return cache.get("servers", {})
+    def has_sanitized_cache(self) -> bool:
+        """Check if sanitized cache exists and has data."""
+        cache = self.load_sanitized()
+        return bool(cache.get("servers"))
+    def clear_sanitized(self):
+        """Clear the sanitized cache."""
+        if self.sanitized_cache_path.exists():
+            self.sanitized_cache_path.unlink()
+        self._sanitized_cache = None
+        logger.info("Sanitized MCP tool cache cleared")
+# Global instance
+_tool_cache: Optional[MCPToolCache] = None
+def get_tool_cache() -> MCPToolCache:
+    """Get global tool cache instance."""
+    global _tool_cache
+    if _tool_cache is None:
+        _tool_cache = MCPToolCache()
+    return _tool_cache

openspace/grounding/backends/mcp/tool_converter.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""
+Tool converter for MCP.
+This module provides utilities to convert MCP tools to BaseTool instances.
+"""
+import copy
+from typing import Any, Dict
+from mcp.types import Tool as MCPTool
+from openspace.grounding.core.tool import BaseTool, RemoteTool
+from openspace.grounding.core.types import BackendType, ToolSchema
+from openspace.grounding.core.transport.connectors import BaseConnector
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+def _sanitize_mcp_schema(params: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Sanitize MCP tool schema to ensure Claude API compatibility (JSON Schema draft 2020-12).
+    Fixes:
+    - Empty schemas -> valid object schema
+    - Missing required fields (type, properties, required)
+    - Removes non-standard fields (title, examples, nullable, default, etc.)
+    - Recursively cleans nested properties and items
+    - Ensures every property has a valid type
+    - Ensures top-level type is 'object' (Anthropic API requirement)
+    """
+    if not params:
+        return {"type": "object", "properties": {}, "required": []}
+    sanitized = copy.deepcopy(params)
+    sanitized = _deep_sanitize(sanitized)
+    # Anthropic API requires top-level type to be 'object'
+    # If it's not an object, wrap the schema as a property of an object
+    top_level_type = sanitized.get("type")
+    if top_level_type and top_level_type != "object":
+        logger.debug(f"[MCP_SCHEMA_SANITIZE] Wrapping non-object schema (type={top_level_type}) into object")
+        wrapped = {
+            "type": "object",
+            "properties": {
+                "value": sanitized  # The original schema becomes a property
+            },
+            "required": ["value"]  # Make it required
+        }
+        sanitized = wrapped
+    return sanitized
+def _deep_sanitize(schema: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Recursively sanitize a JSON schema to conform to JSON Schema draft 2020-12.
+    Removes non-standard fields and ensures valid structure.
+    """
+    if not isinstance(schema, dict):
+        return {"type": "string"}
+    # Allowed top-level keys for Claude API compatibility
+    allowed_keys = {
+        "type", "properties", "required", "items",
+        "description", "enum", "const",
+        "minimum", "maximum", "minLength", "maxLength",
+        "minItems", "maxItems", "pattern",
+        "additionalProperties", "anyOf", "oneOf", "allOf"
+    }
+    # Remove disallowed keys
+    keys_to_remove = [k for k in schema if k not in allowed_keys]
+    for k in keys_to_remove:
+        schema.pop(k, None)
+    # Ensure type exists
+    if "type" not in schema:
+        # Type is defined via anyOf/oneOf/allOf - don't add default type
+        # These combination keywords define the type themselves
+        if "anyOf" in schema or "oneOf" in schema or "allOf" in schema:
+            pass  # Type is defined through combination keywords, do not add default type
+        # Try to infer type
+        elif "properties" in schema:
+            schema["type"] = "object"
+        elif "items" in schema:
+            schema["type"] = "array"
+        elif "enum" in schema:
+            # For enum, try to infer from values
+            enum_vals = schema.get("enum", [])
+            if enum_vals and all(isinstance(v, str) for v in enum_vals):
+                schema["type"] = "string"
+            elif enum_vals and all(isinstance(v, (int, float)) for v in enum_vals):
+                schema["type"] = "number"
+            else:
+                schema["type"] = "string"
+        elif not schema:
+            # Empty schema (e.g., only had $schema which was removed) -> no parameters needed
+            schema["type"] = "object"
+            schema["properties"] = {}
+            schema["required"] = []
+        else:
+            schema["type"] = "object"
+    # Handle object type
+    if schema.get("type") == "object":
+        if "properties" not in schema:
+            schema["properties"] = {}
+        if "required" not in schema:
+            schema["required"] = []
+        # Recursively sanitize properties
+        if isinstance(schema.get("properties"), dict):
+            for prop_name, prop_schema in list(schema["properties"].items()):
+                if isinstance(prop_schema, dict):
+                    schema["properties"][prop_name] = _deep_sanitize(prop_schema)
+                else:
+                    # Invalid property schema, replace with string
+                    schema["properties"][prop_name] = {"type": "string"}
+        # Sanitize additionalProperties if present
+        if "additionalProperties" in schema and isinstance(schema["additionalProperties"], dict):
+            schema["additionalProperties"] = _deep_sanitize(schema["additionalProperties"])
+    # Handle array type
+    elif schema.get("type") == "array":
+        if "items" in schema:
+            if isinstance(schema["items"], dict):
+                schema["items"] = _deep_sanitize(schema["items"])
+            elif isinstance(schema["items"], list):
+                # Tuple validation - sanitize each item
+                schema["items"] = [_deep_sanitize(item) if isinstance(item, dict) else {"type": "string"} for item in schema["items"]]
+            else:
+                schema["items"] = {"type": "string"}
+        else:
+            # Default items to string if not specified
+            schema["items"] = {"type": "string"}
+    # Handle anyOf/oneOf/allOf
+    for combo_key in ["anyOf", "oneOf", "allOf"]:
+        if combo_key in schema and isinstance(schema[combo_key], list):
+            schema[combo_key] = [
+                _deep_sanitize(sub) if isinstance(sub, dict) else {"type": "string"}
+                for sub in schema[combo_key]
+            ]
+    return schema
+def convert_mcp_tool_to_base_tool(
+    mcp_tool: MCPTool,
+    connector: BaseConnector
+) -> BaseTool:
+    """
+    Convert an MCP Tool to a BaseTool (RemoteTool) instance.
+    This function extracts the tool schema from an MCP tool object and creates
+    a RemoteTool that can be used within the grounding framework.
+    Args:
+        mcp_tool: MCP Tool object from the MCP SDK
+        connector: Connector instance for communicating with the MCP server
+    Returns:
+        RemoteTool instance wrapping the MCP tool
+    """
+    # Extract tool metadata
+    tool_name = mcp_tool.name
+    tool_description = getattr(mcp_tool, 'description', None) or ""
+    # Convert MCP input schema to our parameter schema format (with sanitization)
+    input_schema: Dict[str, Any] = {}
+    if hasattr(mcp_tool, 'inputSchema') and mcp_tool.inputSchema:
+        input_schema = _sanitize_mcp_schema(mcp_tool.inputSchema)
+    else:
+        input_schema = {"type": "object", "properties": {}, "required": []}
+    # Create ToolSchema
+    schema = ToolSchema(
+        name=tool_name,
+        description=tool_description,
+        parameters=input_schema,
+        backend_type=BackendType.MCP,
+    )
+    # Create and return RemoteTool
+    remote_tool = RemoteTool(
+        connector=connector,
+        remote_name=tool_name,
+        schema=schema,
+        backend=BackendType.MCP,
+    )
+    logger.debug(f"Converted MCP tool '{tool_name}' to RemoteTool")
+    return remote_tool

openspace/grounding/backends/mcp/transport/connectors/__init__.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""
+Connectors for various MCP transports.
+This module provides interfaces for connecting to MCP implementations
+through different transport mechanisms.
+"""
+from .base import MCPBaseConnector  # noqa: F401
+from .http import HttpConnector  # noqa: F401
+from .sandbox import SandboxConnector  # noqa: F401
+from .stdio import StdioConnector  # noqa: F401
+from .websocket import WebSocketConnector  # noqa: F401
+__all__ = [
+    "MCPBaseConnector",
+    "StdioConnector",
+    "HttpConnector",
+    "WebSocketConnector",
+    "SandboxConnector",
+]

openspace/grounding/backends/mcp/transport/connectors/base.py ADDED Viewed

	@@ -0,0 +1,374 @@

+"""
+Base connector for MCP implementations.
+This module provides the base connector interface that all MCP connectors must implement.
+"""
+import asyncio
+from abc import abstractmethod
+from typing import Any
+from mcp import ClientSession
+from mcp.shared.exceptions import McpError
+from mcp.types import CallToolResult, GetPromptResult, Prompt, ReadResourceResult, Resource, Tool
+from openspace.grounding.core.transport.task_managers import BaseConnectionManager
+from openspace.grounding.core.transport.connectors import BaseConnector
+from openspace.utils.logging import Logger
+logger = Logger.get_logger(__name__)
+# Default retry settings for tool calls
+DEFAULT_TOOL_CALL_MAX_RETRIES = 3
+DEFAULT_TOOL_CALL_RETRY_DELAY = 1.0
+class MCPBaseConnector(BaseConnector[ClientSession]):
+    """Base class for MCP connectors.
+    This class defines the interface that all MCP connectors must implement.
+    """
+    def __init__(
+        self,
+        connection_manager: BaseConnectionManager[ClientSession],
+        tool_call_max_retries: int = DEFAULT_TOOL_CALL_MAX_RETRIES,
+        tool_call_retry_delay: float = DEFAULT_TOOL_CALL_RETRY_DELAY,
+    ):
+        """Initialize base connector with common attributes.
+        Args:
+            connection_manager: The connection manager to use for the connection.
+            tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
+            tool_call_retry_delay: Initial delay between retries in seconds (default: 1.0)
+        """
+        super().__init__(connection_manager)
+        self.client_session: ClientSession | None = None
+        self._tools: list[Tool] | None = None
+        self._resources: list[Resource] | None = None
+        self._prompts: list[Prompt] | None = None
+        self.auto_reconnect = True  # Whether to automatically reconnect on connection loss (not configurable for now)
+        self.tool_call_max_retries = tool_call_max_retries
+        self.tool_call_retry_delay = tool_call_retry_delay
+    @property
+    @abstractmethod
+    def public_identifier(self) -> str:
+        """Get the identifier for the connector."""
+        pass
+    async def _get_streams_from_connection(self):
+        """Get read and write streams from the connection. Override in subclasses if needed."""
+        # Default implementation for most MCP connectors (stdio, HTTP)
+        # Returns the connection directly as it should be a tuple of (read_stream, write_stream)
+        return self._connection
+    async def _after_connect(self) -> None:
+        """Create ClientSession after connection is established.
+        Some connectors (like WebSocket) don't use ClientSession and may override this method.
+        """
+        # Get streams from the connection
+        streams = await self._get_streams_from_connection()
+        if streams is None:
+            # Some connectors (like WebSocket) don't use ClientSession
+            # They should override this method to set up their own resources
+            logger.debug("No streams returned, ClientSession creation skipped")
+            return
+        if isinstance(streams, tuple) and len(streams) == 2:
+            read_stream, write_stream = streams
+            # Create the client session
+            self.client_session = ClientSession(read_stream, write_stream, sampling_callback=None)
+            await self.client_session.__aenter__()
+            logger.debug("MCP ClientSession created successfully")
+        else:
+            raise RuntimeError(f"Invalid streams format: expected tuple of 2 elements, got {type(streams)}")
+    async def _before_disconnect(self) -> None:
+        """Clean up MCP-specific resources before disconnection."""
+        errors = []
+        # Close the client session
+        if self.client_session:
+            try:
+                logger.debug("Closing MCP client session")
+                await self.client_session.__aexit__(None, None, None)
+            except Exception as e:
+                error_msg = f"Error closing client session: {e}"
+                logger.warning(error_msg)
+                errors.append(error_msg)
+            finally:
+                self.client_session = None
+        # Reset tools, resources, and prompts
+        self._tools = None
+        self._resources = None
+        self._prompts = None
+        if errors:
+            logger.warning(f"Encountered {len(errors)} errors during MCP resource cleanup")
+    async def _cleanup_on_connect_failure(self) -> None:
+        """Override to add MCP-specific cleanup on connection failure."""
+        # Clean up client session if it was created
+        if self.client_session:
+            try:
+                await self.client_session.__aexit__(None, None, None)
+            except Exception:
+                pass
+            finally:
+                self.client_session = None
+        # Call parent cleanup
+        await super()._cleanup_on_connect_failure()
+    async def initialize(self) -> dict[str, Any]:
+        """Initialize the MCP session and return session information."""
+        if not self.client_session:
+            raise RuntimeError("MCP client is not connected")
+        logger.debug("Initializing MCP session")
+        # Initialize the session
+        result = await self.client_session.initialize()
+        server_capabilities = result.capabilities
+        if server_capabilities.tools:
+            # Get available tools
+            tools_result = await self.list_tools()
+            self._tools = tools_result or []
+        else:
+            self._tools = []
+        if server_capabilities.resources:
+            # Get available resources
+            resources_result = await self.list_resources()
+            self._resources = resources_result or []
+        else:
+            self._resources = []
+        if server_capabilities.prompts:
+            # Get available prompts
+            prompts_result = await self.list_prompts()
+            self._prompts = prompts_result or []
+        else:
+            self._prompts = []
+        logger.debug(
+            f"MCP session initialized with {len(self._tools)} tools, "
+            f"{len(self._resources)} resources, "
+            f"and {len(self._prompts)} prompts"
+        )
+        return result
+    @property
+    def tools(self) -> list[Tool]:
+        """Get the list of available tools."""
+        if self._tools is None:
+            raise RuntimeError("MCP client is not initialized")
+        return self._tools
+    @property
+    def resources(self) -> list[Resource]:
+        """Get the list of available resources."""
+        if self._resources is None:
+            raise RuntimeError("MCP client is not initialized")
+        return self._resources
+    @property
+    def prompts(self) -> list[Prompt]:
+        """Get the list of available prompts."""
+        if self._prompts is None:
+            raise RuntimeError("MCP client is not initialized")
+        return self._prompts
+    @property
+    def is_connected(self) -> bool:
+        """Check if the connector is actually connected and the connection is alive.
+        This property checks not only the connected flag but also verifies that
+        the client session exists and the underlying connection is still active.
+        Returns:
+            True if the connector is connected and the connection is alive, False otherwise.
+        """
+        # First check the basic connected flag
+        if not self._connected:
+            return False
+        # Check if we have a client session
+        if not self.client_session:
+            self._connected = False
+            return False
+        # Check if connection manager task is still running (if applicable)
+        if self._connection_manager and hasattr(self._connection_manager, "_task"):
+            task = self._connection_manager._task
+            if task and task.done():
+                logger.debug("Connection manager task is done, marking as disconnected")
+                self._connected = False
+                return False
+        return True
+    async def _ensure_connected(self) -> None:
+        """Ensure the connector is connected, reconnecting if necessary.
+        Raises:
+            RuntimeError: If connection cannot be established and auto_reconnect is False.
+        """
+        if not self.client_session:
+            raise RuntimeError("MCP client is not connected")
+        if not self.is_connected:
+            if self.auto_reconnect:
+                logger.debug("Connection lost, attempting to reconnect...")
+                try:
+                    await self.connect()
+                    logger.debug("Reconnection successful")
+                except Exception as e:
+                    raise RuntimeError(f"Failed to reconnect to MCP server: {e}") from e
+            else:
+                raise RuntimeError(
+                    "Connection to MCP server has been lost. Auto-reconnection is disabled. Please reconnect manually."
+                )
+    async def call_tool(self, name: str, arguments: dict[str, Any]) -> CallToolResult:
+        """Call an MCP tool with automatic reconnection handling and retry logic.
+        Args:
+            name: The name of the tool to call.
+            arguments: The arguments to pass to the tool.
+        Returns:
+            The result of the tool call.
+        Raises:
+            RuntimeError: If the connection is lost and cannot be reestablished.
+            Exception: If the tool call fails after all retries.
+        """
+        last_error: Exception | None = None
+        for attempt in range(self.tool_call_max_retries):
+            # Ensure we're connected
+            await self._ensure_connected()
+            logger.debug(f"Calling tool '{name}' with arguments: {arguments} (attempt {attempt + 1}/{self.tool_call_max_retries})")
+            try:
+                result = await self.client_session.call_tool(name, arguments)
+                logger.debug(f"Tool '{name}' called successfully")
+                return result
+            except Exception as e:
+                last_error = e
+                error_str = str(e).lower()
+                # Check if the error might be due to connection loss
+                if not self.is_connected:
+                    logger.warning(f"Tool call '{name}' failed due to connection loss: {e}")
+                    # Try to reconnect on next iteration
+                    continue
+                # Check for retryable HTTP errors (400, 500, 502, 503, 504)
+                is_retryable = any(code in error_str for code in ['400', '500', '502', '503', '504', 'bad request', 'internal server error', 'service unavailable', 'gateway timeout'])
+                if is_retryable and attempt < self.tool_call_max_retries - 1:
+                    delay = self.tool_call_retry_delay * (2 ** attempt)  # Exponential backoff
+                    logger.warning(
+                        f"Tool call '{name}' failed with retryable error: {e}, "
+                        f"retrying in {delay:.1f}s (attempt {attempt + 1}/{self.tool_call_max_retries})"
+                    )
+                    await asyncio.sleep(delay)
+                    continue
+                # Non-retryable error or max retries reached, re-raise
+                raise
+        # All retries exhausted
+        error_msg = f"Tool call '{name}' failed after {self.tool_call_max_retries} retries"
+        logger.error(error_msg)
+        raise RuntimeError(error_msg) from last_error
+    async def list_tools(self) -> list[Tool]:
+        """List all available tools from the MCP implementation."""
+        # Ensure we're connected
+        await self._ensure_connected()
+        logger.debug("Listing tools")
+        try:
+            result = await self.client_session.list_tools()
+            return result.tools
+        except McpError as e:
+            logger.error(f"Error listing tools: {e}")
+            return []
+    async def list_resources(self) -> list[Resource]:
+        """List all available resources from the MCP implementation."""
+        # Ensure we're connected
+        await self._ensure_connected()
+        logger.debug("Listing resources")
+        try:
+            result = await self.client_session.list_resources()
+            return result.resources
+        except McpError as e:
+            logger.error(f"Error listing resources: {e}")
+            return []
+    async def read_resource(self, uri: str) -> ReadResourceResult:
+        """Read a resource by URI."""
+        if not self.client_session:
+            raise RuntimeError("MCP client is not connected")
+        logger.debug(f"Reading resource: {uri}")
+        result = await self.client_session.read_resource(uri)
+        return result
+    async def list_prompts(self) -> list[Prompt]:
+        """List all available prompts from the MCP implementation."""
+        # Ensure we're connected
+        await self._ensure_connected()
+        logger.debug("Listing prompts")
+        try:
+            result = await self.client_session.list_prompts()
+            return result.prompts
+        except McpError as e:
+            logger.error(f"Error listing prompts: {e}")
+            return []
+    async def get_prompt(self, name: str, arguments: dict[str, Any] | None = None) -> GetPromptResult:
+        """Get a prompt by name."""
+        # Ensure we're connected
+        await self._ensure_connected()
+        logger.debug(f"Getting prompt: {name}")
+        result = await self.client_session.get_prompt(name, arguments)
+        return result
+    async def request(self, method: str, params: dict[str, Any] | None = None) -> Any:
+        """Send a raw request to the MCP implementation."""
+        # Ensure we're connected
+        await self._ensure_connected()
+        logger.debug(f"Sending request: {method} with params: {params}")
+        return await self.client_session.request({"method": method, "params": params or {}})
+    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
+        await self._ensure_connected()
+        if not name.startswith("__"):
+            return await self.call_tool(name, params)
+        if name == "__read_resource__":
+            return await self.read_resource(params["uri"])
+        if name == "__list_prompts__":
+            return await self.list_prompts()
+        if name == "__get_prompt__":
+            return await self.get_prompt(params["name"], params.get("args"))
+        raise ValueError(f"Unsupported MCP invoke name: {name}")

openspace/grounding/backends/mcp/transport/connectors/http.py ADDED Viewed

	@@ -0,0 +1,705 @@

+"""
+HTTP connector for MCP implementations.
+This module provides a connector for communicating with MCP implementations
+through HTTP APIs with SSE, Streamable HTTP, or simple JSON-RPC for transport.
+"""
+import asyncio
+import anyio
+import httpx
+from typing import Any, Dict, List
+from mcp import ClientSession
+from mcp.types import (
+    CallToolResult,
+    TextContent,
+    ImageContent,
+    EmbeddedResource,
+    Tool,
+    Resource,
+    Prompt,
+    GetPromptResult,
+    ReadResourceResult,
+)
+from openspace.utils.logging import Logger
+from openspace.grounding.core.transport.task_managers.base import BaseConnectionManager
+from openspace.grounding.backends.mcp.transport.task_managers import SseConnectionManager, StreamableHttpConnectionManager
+from openspace.grounding.backends.mcp.transport.connectors.base import MCPBaseConnector, DEFAULT_TOOL_CALL_MAX_RETRIES, DEFAULT_TOOL_CALL_RETRY_DELAY
+logger = Logger.get_logger(__name__)
+class HttpConnector(MCPBaseConnector):
+    """Connector for MCP implementations using HTTP transport.
+    This connector uses HTTP/SSE or streamable HTTP to communicate with remote MCP implementations,
+    using a connection manager to handle the proper lifecycle management.
+    """
+    def __init__(
+        self,
+        base_url: str,
+        auth_token: str | None = None,
+        headers: dict[str, str] | None = None,
+        timeout: float = 5,
+        sse_read_timeout: float = 60 * 5,
+        tool_call_max_retries: int = DEFAULT_TOOL_CALL_MAX_RETRIES,
+        tool_call_retry_delay: float = DEFAULT_TOOL_CALL_RETRY_DELAY,
+    ):
+        """Initialize a new HTTP connector.
+        Args:
+            base_url: The base URL of the MCP HTTP API.
+            auth_token: Optional authentication token.
+            headers: Optional additional headers.
+            timeout: Timeout for HTTP operations in seconds.
+            sse_read_timeout: Timeout for SSE read operations in seconds.
+            tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
+            tool_call_retry_delay: Initial delay between retries in seconds (default: 1.0)
+        """
+        self.base_url = base_url.rstrip("/")
+        self.auth_token = auth_token
+        self.headers = headers or {}
+        if auth_token:
+            self.headers["Authorization"] = f"Bearer {auth_token}"
+        self.timeout = timeout
+        self.sse_read_timeout = sse_read_timeout
+        # JSON-RPC HTTP mode fields
+        self._use_jsonrpc = False
+        self._jsonrpc_client: httpx.AsyncClient | None = None
+        self._jsonrpc_request_id = 0
+        # Create a placeholder connection manager (will be set up later in connect())
+        # We use a placeholder here because the actual transport type (SSE vs Streamable HTTP)
+        # can only be determined at runtime through server negotiation as per MCP specification
+        from openspace.grounding.core.transport.task_managers import PlaceholderConnectionManager
+        connection_manager = PlaceholderConnectionManager()
+        super().__init__(
+            connection_manager,
+            tool_call_max_retries=tool_call_max_retries,
+            tool_call_retry_delay=tool_call_retry_delay,
+        )
+    async def connect(self) -> None:
+        """Create the underlying session/connection.
+        For JSON-RPC mode, we don't use a connection manager.
+        """
+        if self._connected:
+            return
+        try:
+            # Hook: before connection - this sets up transport type
+            await self._before_connect()
+            if self._use_jsonrpc:
+                # JSON-RPC mode doesn't use connection manager
+                # Just call _after_connect to set up the HTTP client
+                await self._after_connect()
+                self._connected = True
+            else:
+                # Use normal connection flow with connection manager
+                # If _before_connect() already established a connection, reuse it
+                if self._connection is None:
+                    self._connection = await self._connection_manager.start()
+                await self._after_connect()
+                self._connected = True
+        except Exception:
+            await self._cleanup_on_connect_failure()
+            raise
+    async def disconnect(self) -> None:
+        """Close the session/connection and reset state."""
+        if not self._connected:
+            return
+        # Hook: before disconnection
+        await self._before_disconnect()
+        if not self._use_jsonrpc:
+            # Stop the connection manager only for non-JSON-RPC modes
+            if self._connection_manager:
+                await self._connection_manager.stop()
+                self._connection = None
+        # Hook: after disconnection
+        await self._after_disconnect()
+        self._connected = False
+    async def _before_connect(self) -> None:
+        """Negotiate transport type and set up the appropriate connection manager.
+        Tries transports in order:
+        1. Streamable HTTP (new MCP transport)
+        2. SSE (legacy MCP transport)
+        3. Simple JSON-RPC HTTP (for custom servers)
+        This implements backwards compatibility per MCP specification.
+        """
+        self.transport_type = None
+        self._use_jsonrpc = False
+        connection_manager = None
+        streamable_error = None
+        sse_error = None
+        # First, try the new streamable HTTP transport
+        try:
+            logger.debug(f"Attempting streamable HTTP connection to: {self.base_url}")
+            connection_manager = StreamableHttpConnectionManager(
+                self.base_url, self.headers, self.timeout, self.sse_read_timeout
+            )
+            # Test the connection by starting it with built-in timeout
+            read_stream, write_stream = await connection_manager.start(timeout=self.timeout)
+            # Create and verify ClientSession
+            test_client = ClientSession(read_stream, write_stream, sampling_callback=None)
+            # Add timeout to __aenter__ - use asyncio.wait_for instead of anyio.fail_after
+            # to avoid cancel scope conflicts with background tasks
+            try:
+                await asyncio.wait_for(test_client.__aenter__(), timeout=self.timeout)
+            except asyncio.TimeoutError:
+                raise TimeoutError(f"ClientSession enter timed out after {self.timeout}s")
+            try:
+                # Add timeout to initialize() using asyncio.wait_for to prevent hanging
+                try:
+                    await asyncio.wait_for(test_client.initialize(), timeout=self.timeout)
+                except asyncio.TimeoutError:
+                    raise TimeoutError(f"initialize() timed out after {self.timeout}s")
+                try:
+                    await asyncio.wait_for(test_client.list_tools(), timeout=self.timeout)
+                except asyncio.TimeoutError:
+                    raise TimeoutError(f"list_tools() timed out after {self.timeout}s")
+                # SUCCESS! Keep the client session (don't close it, closing destroys the streams)
+                # Store it directly as the client_session for later use
+                self.transport_type = "streamable HTTP"
+                self._connection_manager = connection_manager
+                self._connection = connection_manager.get_streams()
+                self.client_session = test_client  # Reuse the working session
+                logger.debug("Streamable HTTP transport selected")
+                return
+            except TimeoutError:
+                try:
+                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
+                except (asyncio.TimeoutError, Exception):
+                    pass
+                raise
+            except Exception as init_error:
+                # Clean up the test client only on error
+                try:
+                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
+                except (asyncio.TimeoutError, Exception):
+                    pass
+                raise init_error
+        except Exception as e:
+            streamable_error = e
+            logger.debug(f"Streamable HTTP failed: {e}")
+            # Clean up the failed connection manager
+            if connection_manager:
+                try:
+                    await asyncio.wait_for(connection_manager.stop(), timeout=2)
+                except (asyncio.TimeoutError, Exception):
+                    pass
+        # Try SSE fallback
+        try:
+            logger.debug(f"Attempting SSE fallback connection to: {self.base_url}")
+            connection_manager = SseConnectionManager(
+                self.base_url, self.headers, self.timeout, self.sse_read_timeout
+            )
+            # Test the connection by starting it with built-in timeout
+            read_stream, write_stream = await connection_manager.start(timeout=self.timeout)
+            # Create and verify ClientSession
+            test_client = ClientSession(read_stream, write_stream, sampling_callback=None)
+            # Add timeout to __aenter__ - use asyncio.wait_for instead of anyio.fail_after
+            # to avoid cancel scope conflicts with background tasks
+            try:
+                await asyncio.wait_for(test_client.__aenter__(), timeout=self.timeout)
+            except asyncio.TimeoutError:
+                raise TimeoutError(f"ClientSession enter timed out after {self.timeout}s")
+            try:
+                try:
+                    await asyncio.wait_for(test_client.initialize(), timeout=self.timeout)
+                except asyncio.TimeoutError:
+                    raise TimeoutError(f"initialize() timed out after {self.timeout}s")
+                try:
+                    await asyncio.wait_for(test_client.list_tools(), timeout=self.timeout)
+                except asyncio.TimeoutError:
+                    raise TimeoutError(f"list_tools() timed out after {self.timeout}s")
+                # SUCCESS! Keep the client session (don't close it, closing destroys the streams)
+                # Store it directly as the client_session for later use
+                self.transport_type = "SSE"
+                self._connection_manager = connection_manager
+                self._connection = connection_manager.get_streams()
+                self.client_session = test_client  # Reuse the working session
+                logger.debug("SSE transport selected")
+                return
+            except TimeoutError:
+                try:
+                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
+                except (asyncio.TimeoutError, Exception):
+                    pass
+                raise
+            except Exception as init_error:
+                # Clean up the test client only on error
+                try:
+                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
+                except (asyncio.TimeoutError, Exception):
+                    pass
+                raise init_error
+        except Exception as e:
+            sse_error = e
+            logger.debug(f"SSE failed: {e}")
+            # Clean up the failed connection manager
+            if connection_manager:
+                try:
+                    await asyncio.wait_for(connection_manager.stop(), timeout=2)
+                except (asyncio.TimeoutError, Exception):
+                    pass
+        # Both MCP transports failed, try simple JSON-RPC HTTP as last resort
+        # This is useful for custom MCP servers that don't implement proper MCP transports
+        logger.debug(f"Attempting JSON-RPC HTTP fallback to: {self.base_url}")
+        try:
+            # Test JSON-RPC connection
+            await self._try_jsonrpc_connection()
+            self.transport_type = "JSON-RPC HTTP"
+            self._use_jsonrpc = True
+            logger.info(f"JSON-RPC HTTP transport selected for: {self.base_url}")
+            return
+        except Exception as jsonrpc_error:
+            # All transports failed
+            logger.error(
+                f"All transport methods failed for {self.base_url}. "
+                f"Streamable HTTP: {streamable_error}, SSE: {sse_error}, JSON-RPC: {jsonrpc_error}"
+            )
+            # Raise the most relevant error - prefer the original streamable error
+            raise streamable_error or sse_error or jsonrpc_error
+    async def _try_jsonrpc_connection(self) -> None:
+        """Test JSON-RPC HTTP connection by sending an initialize request."""
+        headers = {**self.headers, "Content-Type": "application/json"}
+        async with httpx.AsyncClient(timeout=httpx.Timeout(self.timeout), headers=headers) as client:
+            payload = {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "initialize",
+                "params": {
+                    "protocolVersion": "2024-11-05",
+                    "capabilities": {},
+                    "clientInfo": {"name": "OpenSpace", "version": "1.0.0"},
+                }
+            }
+            response = await client.post(self.base_url, json=payload)
+            response.raise_for_status()
+            data = response.json()
+            # Check for JSON-RPC error
+            if "error" in data:
+                error = data["error"]
+                raise RuntimeError(f"JSON-RPC error: {error.get('message', str(error))}")
+            # Success - server supports JSON-RPC
+            logger.debug(f"JSON-RPC test succeeded: {data.get('result', {})}")
+    async def _after_connect(self) -> None:
+        """Create ClientSession (or set up JSON-RPC client) and log success."""
+        if self._use_jsonrpc:
+            # Set up JSON-RPC HTTP client
+            headers = {**self.headers, "Content-Type": "application/json"}
+            self._jsonrpc_client = httpx.AsyncClient(
+                timeout=httpx.Timeout(self.timeout),
+                headers=headers,
+            )
+            logger.debug(f"JSON-RPC HTTP client set up for: {self.base_url}")
+        else:
+            # Skip creating ClientSession if _before_connect() already created one
+            if self.client_session is None:
+                await super()._after_connect()
+            else:
+                logger.debug("Reusing ClientSession from _before_connect()")
+        logger.debug(f"Successfully connected to MCP implementation via {self.transport_type}: {self.base_url}")
+    async def _before_disconnect(self) -> None:
+        """Clean up resources before disconnection."""
+        # Clean up JSON-RPC client if used
+        if self._jsonrpc_client:
+            try:
+                await self._jsonrpc_client.aclose()
+            except Exception as e:
+                logger.warning(f"Error closing JSON-RPC client: {e}")
+            finally:
+                self._jsonrpc_client = None
+        # Call parent cleanup for MCP resources
+        await super()._before_disconnect()
+    @property
+    def public_identifier(self) -> str:
+        """Get the identifier for the connector."""
+        return {"type": self.transport_type, "base_url": self.base_url}
+    # =====================
+    # JSON-RPC HTTP Methods
+    # =====================
+    def _next_jsonrpc_id(self) -> int:
+        """Get next JSON-RPC request ID."""
+        self._jsonrpc_request_id += 1
+        return self._jsonrpc_request_id
+    async def _send_jsonrpc_request(
+        self,
+        method: str,
+        params: Dict[str, Any] = None,
+        max_retries: int = 3,
+        retry_delay: float = 1.0,
+    ) -> Any:
+        """Send a JSON-RPC request and return the result.
+        Args:
+            method: The JSON-RPC method name (e.g., "tools/list", "tools/call")
+            params: The method parameters
+            max_retries: Maximum number of retries for transient errors (400, 503, etc.)
+            retry_delay: Initial delay between retries (doubles each retry)
+        Returns:
+            The result field from the JSON-RPC response
+        """
+        if not self._jsonrpc_client:
+            raise RuntimeError("JSON-RPC client not initialized")
+        last_error = None
+        for attempt in range(max_retries):
+            request_id = self._next_jsonrpc_id()
+            payload = {
+                "jsonrpc": "2.0",
+                "id": request_id,
+                "method": method,
+                "params": params or {},
+            }
+            logger.debug(f"Sending JSON-RPC request: {method} (id={request_id}, attempt {attempt + 1}/{max_retries})")
+            try:
+                response = await self._jsonrpc_client.post(self.base_url, json=payload)
+                response.raise_for_status()
+                data = response.json()
+                if "error" in data:
+                    error = data["error"]
+                    error_msg = error.get("message", str(error))
+                    raise RuntimeError(f"JSON-RPC error: {error_msg}")
+                return data.get("result", {})
+            except httpx.HTTPStatusError as e:
+                last_error = e
+                status_code = e.response.status_code
+                # Retry on 400 (Bad Request) and 5xx errors
+                # 400 can happen when MCP server is temporarily not ready
+                if status_code in (400, 500, 502, 503, 504) and attempt < max_retries - 1:
+                    delay = retry_delay * (2 ** attempt)
+                    logger.warning(
+                        f"HTTP {status_code} error on {method}, retrying in {delay:.1f}s "
+                        f"(attempt {attempt + 1}/{max_retries})"
+                    )
+                    await asyncio.sleep(delay)
+                    continue
+                raise RuntimeError(f"HTTP error: {status_code}") from e
+            except httpx.RequestError as e:
+                last_error = e
+                # Retry on connection errors
+                if attempt < max_retries - 1:
+                    delay = retry_delay * (2 ** attempt)
+                    logger.warning(
+                        f"Request error on {method}: {e}, retrying in {delay:.1f}s "
+                        f"(attempt {attempt + 1}/{max_retries})"
+                    )
+                    await asyncio.sleep(delay)
+                    continue
+                raise RuntimeError(f"Request error: {e}") from e
+        # Should not reach here, but just in case
+        raise RuntimeError(f"Max retries exceeded for {method}") from last_error
+    def _parse_tools_from_json(self, tools_data: List[Dict]) -> List[Tool]:
+        """Parse tool data into Tool objects."""
+        tools = []
+        for tool_dict in tools_data:
+            try:
+                tool = Tool(
+                    name=tool_dict.get("name", ""),
+                    description=tool_dict.get("description", ""),
+                    inputSchema=tool_dict.get("inputSchema", {}),
+                )
+                tools.append(tool)
+            except Exception as e:
+                logger.warning(f"Failed to parse tool: {e}")
+        return tools
+    def _parse_resources_from_json(self, resources_data: List[Dict]) -> List[Resource]:
+        """Parse resource data into Resource objects."""
+        resources = []
+        for res_dict in resources_data:
+            try:
+                resource = Resource(
+                    uri=res_dict.get("uri", ""),
+                    name=res_dict.get("name", ""),
+                    description=res_dict.get("description"),
+                    mimeType=res_dict.get("mimeType"),
+                )
+                resources.append(resource)
+            except Exception as e:
+                logger.warning(f"Failed to parse resource: {e}")
+        return resources
+    def _parse_prompts_from_json(self, prompts_data: List[Dict]) -> List[Prompt]:
+        """Parse prompt data into Prompt objects."""
+        prompts = []
+        for prompt_dict in prompts_data:
+            try:
+                prompt = Prompt(
+                    name=prompt_dict.get("name", ""),
+                    description=prompt_dict.get("description"),
+                    arguments=prompt_dict.get("arguments"),
+                )
+                prompts.append(prompt)
+            except Exception as e:
+                logger.warning(f"Failed to parse prompt: {e}")
+        return prompts
+    # =====================
+    # Override MCP Methods for JSON-RPC Support
+    # =====================
+    async def initialize(self) -> Dict[str, Any]:
+        """Initialize the MCP session."""
+        if not self._use_jsonrpc:
+            return await super().initialize()
+        # JSON-RPC mode
+        logger.debug("Initializing JSON-RPC HTTP MCP session")
+        result = await self._send_jsonrpc_request("initialize", {
+            "protocolVersion": "2024-11-05",
+            "capabilities": {},
+            "clientInfo": {"name": "OpenSpace", "version": "1.0.0"},
+        })
+        capabilities = result.get("capabilities", {})
+        # List tools
+        if capabilities.get("tools"):
+            try:
+                tools_result = await self._send_jsonrpc_request("tools/list", {})
+                self._tools = self._parse_tools_from_json(tools_result.get("tools", []))
+            except Exception:
+                self._tools = []
+        else:
+            # Try anyway - some servers don't advertise capabilities correctly
+            try:
+                tools_result = await self._send_jsonrpc_request("tools/list", {})
+                self._tools = self._parse_tools_from_json(tools_result.get("tools", []))
+            except Exception:
+                self._tools = []
+        # List resources
+        if capabilities.get("resources"):
+            try:
+                resources_result = await self._send_jsonrpc_request("resources/list", {})
+                self._resources = self._parse_resources_from_json(resources_result.get("resources", []))
+            except Exception:
+                self._resources = []
+        else:
+            self._resources = []
+        # List prompts
+        if capabilities.get("prompts"):
+            try:
+                prompts_result = await self._send_jsonrpc_request("prompts/list", {})
+                self._prompts = self._parse_prompts_from_json(prompts_result.get("prompts", []))
+            except Exception:
+                self._prompts = []
+        else:
+            self._prompts = []
+        logger.info(
+            f"JSON-RPC HTTP MCP session initialized with {len(self._tools)} tools, "
+            f"{len(self._resources)} resources, {len(self._prompts)} prompts"
+        )
+        return result
+    @property
+    def is_connected(self) -> bool:
+        """Check if the connector is connected."""
+        if self._use_jsonrpc:
+            return self._connected and self._jsonrpc_client is not None
+        return super().is_connected
+    async def _ensure_connected(self) -> None:
+        """Ensure the connector is connected."""
+        if self._use_jsonrpc:
+            if not self._connected or not self._jsonrpc_client:
+                raise RuntimeError("JSON-RPC HTTP connector is not connected")
+        else:
+            await super()._ensure_connected()
+    async def list_tools(self) -> List[Tool]:
+        """List all available tools."""
+        if not self._use_jsonrpc:
+            return await super().list_tools()
+        await self._ensure_connected()
+        try:
+            tools_result = await self._send_jsonrpc_request("tools/list", {})
+            self._tools = self._parse_tools_from_json(tools_result.get("tools", []))
+            return self._tools
+        except Exception as e:
+            logger.error(f"Error listing tools: {e}")
+            return []
+    async def call_tool(self, name: str, arguments: Dict[str, Any]) -> CallToolResult:
+        """Call an MCP tool."""
+        if not self._use_jsonrpc:
+            return await super().call_tool(name, arguments)
+        await self._ensure_connected()
+        logger.debug(f"Calling tool '{name}' with arguments: {arguments}")
+        result = await self._send_jsonrpc_request("tools/call", {
+            "name": name,
+            "arguments": arguments,
+        })
+        # Parse the result into CallToolResult
+        content = []
+        for item in result.get("content", []):
+            item_type = item.get("type", "text")
+            if item_type == "text":
+                content.append(TextContent(type="text", text=item.get("text", "")))
+            elif item_type == "image":
+                content.append(ImageContent(
+                    type="image",
+                    data=item.get("data", ""),
+                    mimeType=item.get("mimeType", "image/png"),
+                ))
+            elif item_type == "resource":
+                content.append(EmbeddedResource(
+                    type="resource",
+                    resource=item.get("resource", {}),
+                ))
+        if not content and result:
+            content.append(TextContent(type="text", text=str(result)))
+        return CallToolResult(
+            content=content,
+            isError=result.get("isError", False),
+        )
+    async def list_resources(self) -> List[Resource]:
+        """List all available resources."""
+        if not self._use_jsonrpc:
+            return await super().list_resources()
+        await self._ensure_connected()
+        try:
+            resources_result = await self._send_jsonrpc_request("resources/list", {})
+            self._resources = self._parse_resources_from_json(resources_result.get("resources", []))
+            return self._resources
+        except Exception as e:
+            logger.error(f"Error listing resources: {e}")
+            return []
+    async def read_resource(self, uri: str) -> ReadResourceResult:
+        """Read a resource by URI."""
+        if not self._use_jsonrpc:
+            return await super().read_resource(uri)
+        await self._ensure_connected()
+        result = await self._send_jsonrpc_request("resources/read", {"uri": uri})
+        return ReadResourceResult(**result)
+    async def list_prompts(self) -> List[Prompt]:
+        """List all available prompts."""
+        if not self._use_jsonrpc:
+            return await super().list_prompts()
+        await self._ensure_connected()
+        try:
+            prompts_result = await self._send_jsonrpc_request("prompts/list", {})
+            self._prompts = self._parse_prompts_from_json(prompts_result.get("prompts", []))
+            return self._prompts
+        except Exception as e:
+            logger.error(f"Error listing prompts: {e}")
+            return []
+    async def get_prompt(self, name: str, arguments: Dict[str, Any] | None = None) -> GetPromptResult:
+        """Get a prompt by name."""
+        if not self._use_jsonrpc:
+            return await super().get_prompt(name, arguments)
+        await self._ensure_connected()
+        result = await self._send_jsonrpc_request("prompts/get", {
+            "name": name,
+            "arguments": arguments or {},
+        })
+        return GetPromptResult(**result)
+    async def request(self, method: str, params: Dict[str, Any] | None = None) -> Any:
+        """Send a raw request to the MCP implementation."""
+        if not self._use_jsonrpc:
+            return await super().request(method, params)
+        await self._ensure_connected()
+        return await self._send_jsonrpc_request(method, params or {})
+    async def invoke(self, name: str, params: Dict[str, Any]) -> Any:
+        """Invoke a tool or special method."""
+        if not self._use_jsonrpc:
+            return await super().invoke(name, params)
+        await self._ensure_connected()
+        if not name.startswith("__"):
+            return await self.call_tool(name, params)
+        if name == "__read_resource__":
+            return await self.read_resource(params["uri"])
+        if name == "__list_prompts__":
+            return await self.list_prompts()
+        if name == "__get_prompt__":
+            return await self.get_prompt(params["name"], params.get("args"))
+        raise ValueError(f"Unsupported MCP invoke name: {name}")

openspace/grounding/backends/mcp/transport/connectors/sandbox.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+Sandbox connector for MCP implementations.
+This module provides a connector for communicating with MCP implementations
+that are executed inside a sandbox environment (supports any BaseSandbox implementation).
+"""
+import asyncio
+import sys
+import time
+import aiohttp
+from mcp import ClientSession
+from openspace.utils.logging import Logger
+from openspace.grounding.backends.mcp.transport.task_managers import SseConnectionManager
+from openspace.grounding.core.security import BaseSandbox
+from openspace.grounding.backends.mcp.transport.connectors.base import MCPBaseConnector
+logger = Logger.get_logger(__name__)
+class SandboxConnector(MCPBaseConnector):
+    """Connector for MCP implementations running in a sandbox environment.
+    This connector runs a user-defined stdio command within a sandbox environment
+    through a BaseSandbox implementation (e.g., E2BSandbox), potentially wrapped
+    by a utility like 'supergateway' to expose its stdio.
+    """
+    def __init__(
+        self,
+        sandbox: BaseSandbox,
+        command: str,
+        args: list[str],
+        env: dict[str, str] | None = None,
+        supergateway_command: str = "npx -y supergateway",
+        port: int = 3000,
+        timeout: float = 5,
+        sse_read_timeout: float = 60 * 5,
+    ):
+        """Initialize a new sandbox connector.
+        Args:
+            sandbox: A BaseSandbox implementation (e.g., E2BSandbox) to run commands in.
+            command: The user's MCP server command to execute in the sandbox.
+            args: Command line arguments for the user's MCP server command.
+            env: Environment variables for the user's MCP server command.
+            supergateway_command: Command to run supergateway (default: "npx -y supergateway").
+            port: Port number for the sandbox server (default: 3000).
+            timeout: Timeout for the sandbox process in seconds.
+            sse_read_timeout: Timeout for the SSE connection in seconds.
+        """
+        # Store user command configuration
+        self.user_command = command
+        self.user_args = args or []
+        self.user_env = env or {}
+        self.port = port
+        # Create a placeholder connection manager (will be set up in connect())
+        # We need the sandbox to start first to get the base_url, so we can't create
+        # the real SseConnectionManager until connect() is called
+        from openspace.grounding.core.transport.task_managers import PlaceholderConnectionManager
+        connection_manager = PlaceholderConnectionManager()
+        super().__init__(connection_manager)
+        # Sandbox configuration
+        self._sandbox = sandbox
+        self.supergateway_cmd_parts = supergateway_command
+        # Runtime state
+        self.process = None
+        self.client_session: ClientSession | None = None
+        self.errlog = sys.stderr
+        self.base_url: str | None = None
+        self._connected = False
+        self._connection_manager: SseConnectionManager | None = None
+        # SSE connection parameters
+        self.headers = {}
+        self.timeout = timeout
+        self.sse_read_timeout = sse_read_timeout
+        self.stdout_lines: list[str] = []
+        self.stderr_lines: list[str] = []
+        self._server_ready = asyncio.Event()
+    def _handle_stdout(self, data: str) -> None:
+        """Handle stdout data from the sandbox process."""
+        self.stdout_lines.append(data)
+        logger.debug(f"[SANDBOX STDOUT] {data}", end="", flush=True)
+    def _handle_stderr(self, data: str) -> None:
+        """Handle stderr data from the sandbox process."""
+        self.stderr_lines.append(data)
+        logger.debug(f"[SANDBOX STDERR] {data}", file=self.errlog, end="", flush=True)
+    async def wait_for_server_response(self, base_url: str, timeout: int = 30) -> bool:
+        """Wait for the server to respond to HTTP requests.
+        Args:
+            base_url: The base URL to check for server readiness
+            timeout: Maximum time to wait in seconds
+        Returns:
+            True if server is responding, raises TimeoutError otherwise
+        """
+        logger.info(f"Waiting for server at {base_url} to respond...")
+        sys.stdout.flush()
+        start_time = time.time()
+        ping_url = f"{base_url}/sse"
+        # Try to connect to the server
+        while time.time() - start_time < timeout:
+            try:
+                async with aiohttp.ClientSession() as session:
+                    try:
+                        # First try the endpoint
+                        async with session.get(ping_url, timeout=2) as response:
+                            if response.status == 200:
+                                elapsed = time.time() - start_time
+                                logger.info(f"Server is ready! SSE endpoint responded with 200 after {elapsed:.1f}s")
+                                return True
+                    except Exception:
+                        # If sse endpoint doesn't work, try the base URL
+                        async with session.get(base_url, timeout=2) as response:
+                            if response.status < 500:  # Accept any non-server error
+                                elapsed = time.time() - start_time
+                                logger.info(
+                                    f"Server is ready! Base URL responded with {response.status} after {elapsed:.1f}s"
+                                )
+                                return True
+            except Exception:
+                # Wait a bit before trying again
+                await asyncio.sleep(0.5)
+                continue
+            # If we get here, the request failed
+            await asyncio.sleep(0.5)
+            # Log status every 5 seconds
+            elapsed = time.time() - start_time
+            if int(elapsed) % 5 == 0:
+                logger.info(f"Still waiting for server to respond... ({elapsed:.1f}s elapsed)")
+                sys.stdout.flush()
+        # If we get here, we timed out
+        raise TimeoutError(f"Timeout waiting for server to respond (waited {timeout} seconds)")
+    async def _before_connect(self) -> None:
+        """Set up the sandbox and prepare the connection manager."""
+        logger.debug("Connecting to MCP implementation in sandbox")
+        # Start the sandbox if not already active
+        if not self._sandbox.is_active:
+            logger.debug("Starting sandbox...")
+            await self._sandbox.start()
+        # Get the host for the sandbox
+        # Note: This assumes the sandbox implementation has a get_host method
+        # For E2BSandbox, this is available
+        host = self._sandbox.get_host(self.port)
+        self.base_url = f"https://{host}".rstrip("/")
+        # Append command with args
+        command = f"{self.user_command} {' '.join(self.user_args)}"
+        # Construct the full command with supergateway
+        full_command = f'{self.supergateway_cmd_parts} \
+            --base-url {self.base_url} \
+            --port {self.port} \
+            --cors \
+            --stdio "{command}"'
+        logger.debug(f"Full command: {full_command}")
+        # Execute the command in the sandbox
+        self.process = await self._sandbox.execute_safe(
+            full_command,
+            envs=self.user_env,
+            timeout=1000 * 60 * 10,  # 10 minutes timeout
+            background=True,
+            on_stdout=self._handle_stdout,
+            on_stderr=self._handle_stderr,
+        )
+        # Wait for the server to be ready
+        await self.wait_for_server_response(self.base_url, timeout=30)
+        logger.debug("Initializing connection manager...")
+        # Create the SSE connection URL
+        sse_url = f"{self.base_url}/sse"
+        # Create and set up the connection manager
+        self._connection_manager = SseConnectionManager(sse_url, self.headers, self.timeout, self.sse_read_timeout)
+    async def _after_connect(self) -> None:
+        """Create ClientSession and log success."""
+        await super()._after_connect()
+        logger.debug(f"Successfully connected to MCP implementation via HTTP/SSE in sandbox: {self.base_url}")
+    async def _before_disconnect(self) -> None:
+        """Clean up sandbox-specific resources before disconnection."""
+        logger.debug("Cleaning up sandbox resources")
+        # Stop the sandbox (which will clean up processes)
+        if self._sandbox and self._sandbox.is_active:
+            try:
+                logger.debug("Stopping sandbox instance")
+                await self._sandbox.stop()
+                logger.debug("Sandbox instance stopped successfully")
+            except Exception as e:
+                logger.warning(f"Error stopping sandbox: {e}")
+        self.process = None
+        # Call the parent method to clean up MCP resources
+        await super()._before_disconnect()
+        # Clear any collected output
+        self.stdout_lines = []
+        self.stderr_lines = []
+        self.base_url = None
+    async def _cleanup_on_connect_failure(self) -> None:
+        """Clean up sandbox resources on connection failure."""
+        # Stop the sandbox if it was started
+        if self._sandbox and self._sandbox.is_active:
+            try:
+                await self._sandbox.stop()
+            except Exception as e:
+                logger.warning(f"Error stopping sandbox during cleanup: {e}")
+        self.process = None
+        self.stdout_lines = []
+        self.stderr_lines = []
+        self.base_url = None
+        # Call parent cleanup
+        await super()._cleanup_on_connect_failure()
+    @property
+    def sandbox(self) -> BaseSandbox:
+        """Get the underlying sandbox instance."""
+        return self._sandbox
+    @property
+    def public_identifier(self) -> str:
+        """Get the identifier for the connector."""
+        return {"type": "sandbox", "command": self.user_command, "args": self.user_args}

openspace/grounding/backends/mcp/transport/connectors/stdio.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""
+StdIO connector for MCP implementations.
+This module provides a connector for communicating with MCP implementations
+through the standard input/output streams.
+"""
+import sys
+from mcp import ClientSession, StdioServerParameters
+from openspace.utils.logging import Logger
+from ..task_managers import StdioConnectionManager
+from .base import MCPBaseConnector
+logger = Logger.get_logger(__name__)
+class StdioConnector(MCPBaseConnector):
+    """Connector for MCP implementations using stdio transport.
+    This connector uses the stdio transport to communicate with MCP implementations
+    that are executed as child processes. It uses a connection manager to handle
+    the proper lifecycle management of the stdio client.
+    """
+    def __init__(
+        self,
+        command: str = "npx",
+        args: list[str] | None = None,
+        env: dict[str, str] | None = None,
+        errlog=None,
+    ):
+        """Initialize a new stdio connector.
+        Args:
+            command: The command to execute.
+            args: Optional command line arguments.
+            env: Optional environment variables.
+            errlog: Stream to write error output to (defaults to filtered stderr).
+                   StdioConnectionManager will wrap this to filter harmless errors.
+        """
+        self.command = command
+        self.args = args or []  # Ensure args is never None
+        # Ensure env is not None and add settings to suppress non-JSON output from servers
+        self.env = env or {}
+        # Add environment variables to encourage MCP servers to suppress non-JSON output
+        # Many Node.js-based servers respect NODE_ENV=production
+        if "NODE_ENV" not in self.env:
+            self.env["NODE_ENV"] = "production"
+        # Add flag to suppress informational messages (some servers respect this)
+        if "MCP_SILENT" not in self.env:
+            self.env["MCP_SILENT"] = "true"
+        self.errlog = errlog
+        # Create server parameters and connection manager
+        # StdioConnectionManager will wrap errlog in FilteredStderrWrapper
+        server_params = StdioServerParameters(command=self.command, args=self.args, env=self.env)
+        connection_manager = StdioConnectionManager(server_params, self.errlog)
+        super().__init__(connection_manager)
+    async def _before_connect(self) -> None:
+        """Log connection attempt."""
+        logger.debug(f"Connecting to MCP implementation: {self.command}")
+    async def _after_connect(self) -> None:
+        """Create ClientSession and log success."""
+        # Call parent's _after_connect to create the ClientSession
+        await super()._after_connect()
+        logger.debug(f"Successfully connected to MCP implementation: {self.command}")
+    @property
+    def public_identifier(self) -> dict[str, str]:
+        return {"type": "stdio", "command&args": f"{self.command} {' '.join(self.args)}"}