Spaces:

SEUyishu
/

MatDeepLearn

Sleeping

App Files Files Community

SEUyishu commited on Dec 3, 2025

Commit

51e7c6b

verified ·

1 Parent(s): 596b729

Update mcp_output/mcp_plugin/mcp_service.py

Browse files

Files changed (1) hide show

mcp_output/mcp_plugin/mcp_service.py +1083 -0

mcp_output/mcp_plugin/mcp_service.py CHANGED Viewed

@@ -9,6 +9,11 @@ import json
 import tempfile
 import yaml
 import numpy as np
 from typing import Optional, List, Dict, Any
 from pathlib import Path
@@ -31,6 +36,477 @@ except ImportError as e:
 mcp = FastMCP("matdeeplearn_service")
 @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
 def check_environment() -> dict:
@@ -884,6 +1360,613 @@ def quick_structure_analysis(
         return {"success": False, "error": str(e)}
 def create_app() -> FastMCP:
     """
     Creates and returns the FastMCP application instance.

 import tempfile
 import yaml
 import numpy as np
+import base64
+import hashlib
+import shutil
+import uuid
+from datetime import datetime
 from typing import Optional, List, Dict, Any
 from pathlib import Path
 mcp = FastMCP("matdeeplearn_service")
+# ============================================================================
+# 全局存储管理 - 用于管理上传的数据和训练的模型
+# ============================================================================
+# 服务器端存储目录
+STORAGE_BASE = os.path.join(project_root, "mcp_storage")
+DATASETS_DIR = os.path.join(STORAGE_BASE, "datasets")
+MODELS_DIR = os.path.join(STORAGE_BASE, "models")
+SESSIONS_DIR = os.path.join(STORAGE_BASE, "sessions")
+# 确保存储目录存在
+for dir_path in [STORAGE_BASE, DATASETS_DIR, MODELS_DIR, SESSIONS_DIR]:
+    os.makedirs(dir_path, exist_ok=True)
+# 会话管理字典 (session_id -> session_info)
+_sessions: Dict[str, Dict] = {}
+def _get_session_path(session_id: str) -> str:
+    """获取会话目录路径"""
+    return os.path.join(SESSIONS_DIR, session_id)
+def _generate_session_id() -> str:
+    """生成唯一会话ID"""
+    return f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
+def _generate_dataset_id(name: str) -> str:
+    """生成数据集ID"""
+    return f"dataset_{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+def _generate_model_id(model_name: str) -> str:
+    """生成模型ID"""
+    return f"model_{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+# ============================================================================
+# 会话管理工具
+# ============================================================================
+@mcp.tool(name="create_session", description="Create a new working session for uploading data and training models. Returns a session_id to use in subsequent operations.")
+def create_session(session_name: Optional[str] = None) -> dict:
+    """
+    Create a new working session. Use this before uploading data.
+    Parameters:
+        session_name (str, optional): A friendly name for this session.
+    Returns:
+        dict: Contains session_id and session info.
+    Example:
+        create_session(session_name="my_material_project")
+    """
+    try:
+        session_id = _generate_session_id()
+        session_path = _get_session_path(session_id)
+        os.makedirs(session_path, exist_ok=True)
+        os.makedirs(os.path.join(session_path, "data"), exist_ok=True)
+        os.makedirs(os.path.join(session_path, "models"), exist_ok=True)
+        os.makedirs(os.path.join(session_path, "outputs"), exist_ok=True)
+        session_info = {
+            "session_id": session_id,
+            "session_name": session_name or session_id,
+            "created_at": datetime.now().isoformat(),
+            "data_path": os.path.join(session_path, "data"),
+            "models_path": os.path.join(session_path, "models"),
+            "outputs_path": os.path.join(session_path, "outputs"),
+            "uploaded_files": [],
+            "trained_models": [],
+            "status": "active"
+        }
+        _sessions[session_id] = session_info
+        # Save session info to disk
+        with open(os.path.join(session_path, "session_info.json"), 'w') as f:
+            json.dump(session_info, f, indent=2)
+        return {
+            "success": True,
+            "session_id": session_id,
+            "session_name": session_info["session_name"],
+            "message": "Session created successfully. Use this session_id for uploading data and training.",
+            "next_steps": [
+                "1. Upload structure files using upload_structure_files",
+                "2. Upload targets.csv using upload_targets",
+                "3. Process data using process_session_data",
+                "4. Train model using train_session_model"
+            ]
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="get_session_info", description="Get information about an existing session.")
+def get_session_info(session_id: str) -> dict:
+    """
+    Get information about an existing session.
+    Parameters:
+        session_id (str): The session ID returned from create_session.
+    Returns:
+        dict: Session information including uploaded files and trained models.
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        info_file = os.path.join(session_path, "session_info.json")
+        if not os.path.exists(info_file):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        with open(info_file, 'r') as f:
+            session_info = json.load(f)
+        # Update with current file counts
+        data_path = session_info["data_path"]
+        if os.path.exists(data_path):
+            files = os.listdir(data_path)
+            session_info["current_files"] = files
+            session_info["file_count"] = len(files)
+            session_info["has_targets"] = "targets.csv" in files
+        return {"success": True, **session_info}
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="list_sessions", description="List all available sessions.")
+def list_sessions() -> dict:
+    """
+    List all available sessions on the server.
+    Returns:
+        dict: List of sessions with their basic info.
+    """
+    try:
+        sessions = []
+        if os.path.exists(SESSIONS_DIR):
+            for session_id in os.listdir(SESSIONS_DIR):
+                info_file = os.path.join(SESSIONS_DIR, session_id, "session_info.json")
+                if os.path.exists(info_file):
+                    with open(info_file, 'r') as f:
+                        info = json.load(f)
+                    sessions.append({
+                        "session_id": session_id,
+                        "session_name": info.get("session_name", session_id),
+                        "created_at": info.get("created_at"),
+                        "status": info.get("status", "unknown")
+                    })
+        return {
+            "success": True,
+            "sessions": sessions,
+            "total_sessions": len(sessions)
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="delete_session", description="Delete a session and all its data.")
+def delete_session(session_id: str, confirm: bool = False) -> dict:
+    """
+    Delete a session and all associated data.
+    Parameters:
+        session_id (str): The session ID to delete.
+        confirm (bool): Must be True to confirm deletion.
+    Returns:
+        dict: Deletion status.
+    """
+    try:
+        if not confirm:
+            return {
+                "success": False,
+                "error": "Please set confirm=True to delete the session. This action cannot be undone."
+            }
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        shutil.rmtree(session_path)
+        if session_id in _sessions:
+            del _sessions[session_id]
+        return {
+            "success": True,
+            "message": f"Session {session_id} deleted successfully."
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+# ============================================================================
+# 数据上传工具
+# ============================================================================
+@mcp.tool(name="upload_structure_file", description="Upload a single structure file to a session. Supports CIF, XYZ, POSCAR, JSON formats.")
+def upload_structure_file(
+    session_id: str,
+    filename: str,
+    file_content: str,
+    file_format: Optional[str] = None
+) -> dict:
+    """
+    Upload a single structure file to a session.
+    Parameters:
+        session_id (str): The session ID.
+        filename (str): Name for the file (e.g., "structure1.cif").
+        file_content (str): The complete file content as a string.
+        file_format (str, optional): File format hint (auto-detected from filename if not provided).
+    Returns:
+        dict: Upload status and file info.
+    Example:
+        upload_structure_file(
+            session_id="session_xxx",
+            filename="NaCl.cif",
+            file_content="data_NaCl\\n_cell_length_a 5.64..."
+        )
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        file_path = os.path.join(data_path, filename)
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(file_content)
+        # Validate structure if possible
+        validation = {"valid": True}
+        try:
+            import ase.io
+            with tempfile.NamedTemporaryFile(mode='w', suffix=os.path.splitext(filename)[1], delete=False) as tmp:
+                tmp.write(file_content)
+                tmp_path = tmp.name
+            try:
+                structure = ase.io.read(tmp_path)
+                validation = {
+                    "valid": True,
+                    "num_atoms": len(structure),
+                    "formula": structure.get_chemical_formula()
+                }
+            finally:
+                os.unlink(tmp_path)
+        except Exception as e:
+            validation = {"valid": False, "warning": str(e)}
+        return {
+            "success": True,
+            "filename": filename,
+            "file_size": len(file_content),
+            "saved_to": file_path,
+            "validation": validation
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="upload_structure_files_batch", description="Upload multiple structure files at once to a session.")
+def upload_structure_files_batch(
+    session_id: str,
+    files: Dict[str, str]
+) -> dict:
+    """
+    Upload multiple structure files to a session in one call.
+    Parameters:
+        session_id (str): The session ID.
+        files (dict): Dictionary mapping filename to file content.
+                     Example: {"struct1.cif": "content1", "struct2.cif": "content2"}
+    Returns:
+        dict: Upload status for all files.
+    Example:
+        upload_structure_files_batch(
+            session_id="session_xxx",
+            files={
+                "NaCl.cif": "data_NaCl...",
+                "ZnO.cif": "data_ZnO..."
+            }
+        )
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        results = []
+        success_count = 0
+        for filename, content in files.items():
+            try:
+                file_path = os.path.join(data_path, filename)
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    f.write(content)
+                results.append({
+                    "filename": filename,
+                    "success": True,
+                    "size": len(content)
+                })
+                success_count += 1
+            except Exception as e:
+                results.append({
+                    "filename": filename,
+                    "success": False,
+                    "error": str(e)
+                })
+        return {
+            "success": True,
+            "total_files": len(files),
+            "successful_uploads": success_count,
+            "failed_uploads": len(files) - success_count,
+            "results": results
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="upload_targets", description="Upload targets.csv file containing target properties for training.")
+def upload_targets(
+    session_id: str,
+    targets_content: str,
+    validate: bool = True
+) -> dict:
+    """
+    Upload targets.csv file to a session.
+    Parameters:
+        session_id (str): The session ID.
+        targets_content (str): Content of targets.csv file.
+                              Format: structure_id,target_value (one per line).
+        validate (bool): Whether to validate the targets file.
+    Returns:
+        dict: Upload status and validation info.
+    Example:
+        upload_targets(
+            session_id="session_xxx",
+            targets_content="NaCl,1.5\\nZnO,2.3\\nTiO2,3.1"
+        )
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        targets_path = os.path.join(data_path, "targets.csv")
+        with open(targets_path, 'w', encoding='utf-8') as f:
+            f.write(targets_content)
+        # Validate and analyze
+        validation = {"valid": True}
+        if validate:
+            import csv
+            from io import StringIO
+            reader = csv.reader(StringIO(targets_content))
+            rows = list(reader)
+            structure_ids = []
+            target_values = []
+            for row in rows:
+                if len(row) >= 2:
+                    structure_ids.append(row[0])
+                    try:
+                        target_values.append(float(row[1]))
+                    except:
+                        pass
+            # Check for matching structure files
+            existing_files = os.listdir(data_path)
+            structure_files = [f for f in existing_files if f != "targets.csv"]
+            structure_names = [os.path.splitext(f)[0] for f in structure_files]
+            matched = [sid for sid in structure_ids if sid in structure_names]
+            unmatched = [sid for sid in structure_ids if sid not in structure_names]
+            validation = {
+                "valid": True,
+                "num_samples": len(rows),
+                "num_valid_targets": len(target_values),
+                "target_range": {
+                    "min": min(target_values) if target_values else None,
+                    "max": max(target_values) if target_values else None,
+                    "mean": sum(target_values) / len(target_values) if target_values else None
+                },
+                "matched_structures": len(matched),
+                "unmatched_structures": unmatched[:10] if unmatched else [],
+                "existing_structure_files": len(structure_files)
+            }
+        return {
+            "success": True,
+            "saved_to": targets_path,
+            "validation": validation
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="upload_binary_file", description="Upload a binary file (like .pth model file) encoded as base64.")
+def upload_binary_file(
+    session_id: str,
+    filename: str,
+    base64_content: str,
+    destination: str = "models"
+) -> dict:
+    """
+    Upload a binary file (e.g., pre-trained model .pth file) encoded as base64.
+    Parameters:
+        session_id (str): The session ID.
+        filename (str): Name for the file.
+        base64_content (str): File content encoded as base64 string.
+        destination (str): Where to save - "models" or "data".
+    Returns:
+        dict: Upload status.
+    Example:
+        # In Python, encode your model file:
+        # import base64
+        # with open("model.pth", "rb") as f:
+        #     encoded = base64.b64encode(f.read()).decode()
+        # Then pass encoded as base64_content
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        if destination == "models":
+            dest_path = os.path.join(session_path, "models")
+        else:
+            dest_path = os.path.join(session_path, "data")
+        file_path = os.path.join(dest_path, filename)
+        # Decode and write binary content
+        binary_content = base64.b64decode(base64_content)
+        with open(file_path, 'wb') as f:
+            f.write(binary_content)
+        return {
+            "success": True,
+            "filename": filename,
+            "file_size_bytes": len(binary_content),
+            "saved_to": file_path
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
 @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
 def check_environment() -> dict:
         return {"success": False, "error": str(e)}
+# ============================================================================
+# 基于会话的训练和模型管理工具
+# ============================================================================
+@mcp.tool(name="process_session_data", description="Process uploaded structure data in a session into graph format for GNN training.")
+def process_session_data(
+    session_id: str,
+    target_index: int = 0,
+    graph_max_radius: float = 8.0,
+    graph_max_neighbors: int = 12,
+    reprocess: bool = True
+) -> dict:
+    """
+    Process all uploaded structure files in a session into graph format.
+    Parameters:
+        session_id (str): The session ID.
+        target_index (int): Index of target column in targets.csv (default: 0, meaning second column).
+        graph_max_radius (float): Maximum radius for graph edges (default: 8.0 Angstrom).
+        graph_max_neighbors (int): Maximum neighbors per atom (default: 12).
+        reprocess (bool): Force reprocessing even if already processed (default: True).
+    Returns:
+        dict: Processing status and dataset statistics.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        # Check for required files
+        if not os.path.exists(os.path.join(data_path, "targets.csv")):
+            return {
+                "success": False,
+                "error": "targets.csv not found. Please upload targets using upload_targets first."
+            }
+        files = [f for f in os.listdir(data_path) if f != "targets.csv" and not f.startswith('.')]
+        if len(files) == 0:
+            return {
+                "success": False,
+                "error": "No structure files found. Please upload structure files first."
+            }
+        processing_args = {
+            "dataset_type": "inmemory",
+            "data_path": data_path,
+            "target_path": "targets.csv",
+            "dictionary_source": "default",
+            "dictionary_path": "atom_dict.json",
+            "data_format": "json",
+            "verbose": "True",
+            "graph_max_radius": graph_max_radius,
+            "graph_max_neighbors": graph_max_neighbors,
+            "voronoi": "False",
+            "edge_features": "True",
+            "graph_edge_length": 50,
+            "SM_descriptor": "False",
+            "SOAP_descriptor": "False"
+        }
+        dataset = process.get_dataset(
+            data_path,
+            target_index,
+            "True" if reprocess else "False",
+            processing_args
+        )
+        # Calculate statistics
+        num_nodes_list = [data.x.shape[0] for data in dataset]
+        num_edges_list = [data.edge_index.shape[1] for data in dataset]
+        return {
+            "success": True,
+            "session_id": session_id,
+            "dataset_size": len(dataset),
+            "statistics": {
+                "avg_atoms_per_structure": float(np.mean(num_nodes_list)),
+                "min_atoms": min(num_nodes_list),
+                "max_atoms": max(num_nodes_list),
+                "avg_edges_per_structure": float(np.mean(num_edges_list)),
+                "num_node_features": dataset[0].x.shape[1] if len(dataset) > 0 else 0
+            },
+            "ready_for_training": True,
+            "next_step": "Use train_session_model to train a model on this data."
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="train_session_model", description="Train a GNN model on processed session data.")
+def train_session_model(
+    session_id: str,
+    model_name: str = "CGCNN_demo",
+    epochs: int = 100,
+    batch_size: int = 32,
+    learning_rate: float = 0.002,
+    train_ratio: float = 0.8,
+    val_ratio: float = 0.1,
+    test_ratio: float = 0.1,
+    model_save_name: Optional[str] = None
+) -> dict:
+    """
+    Train a GNN model on processed session data.
+    Parameters:
+        session_id (str): The session ID with processed data.
+        model_name (str): Model to use - "CGCNN_demo", "SchNet_demo", "MPNN_demo", etc.
+        epochs (int): Number of training epochs (default: 100).
+        batch_size (int): Batch size (default: 32).
+        learning_rate (float): Learning rate (default: 0.002).
+        train_ratio (float): Training data ratio (default: 0.8).
+        val_ratio (float): Validation data ratio (default: 0.1).
+        test_ratio (float): Test data ratio (default: 0.1).
+        model_save_name (str, optional): Custom name for saved model.
+    Returns:
+        dict: Training results including errors and model path.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        models_path = os.path.join(session_path, "models")
+        outputs_path = os.path.join(session_path, "outputs")
+        # Load config
+        config_path = os.path.join(project_root, "config.yml")
+        with open(config_path, "r") as f:
+            config = yaml.load(f, Loader=yaml.FullLoader)
+        if model_name not in config.get("Models", {}):
+            available_models = list(config.get("Models", {}).keys())
+            return {
+                "success": False,
+                "error": f"Model '{model_name}' not found. Available: {available_models}"
+            }
+        # Generate model filename
+        if model_save_name:
+            model_filename = f"{model_save_name}.pth"
+        else:
+            model_filename = f"{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pth"
+        model_path = os.path.join(models_path, model_filename)
+        # Prepare job config
+        job_config = {
+            "job_name": f"train_{session_id}",
+            "reprocess": "False",
+            "model": model_name,
+            "load_model": "False",
+            "save_model": "True",
+            "model_path": model_path,
+            "write_output": "True",
+            "parallel": "False",
+            "seed": np.random.randint(1, 1e6)
+        }
+        training_config = {
+            "target_index": 0,
+            "loss": "l1_loss",
+            "train_ratio": train_ratio,
+            "val_ratio": val_ratio,
+            "test_ratio": test_ratio,
+            "verbosity": 5
+        }
+        model_config = config["Models"][model_name].copy()
+        model_config["epochs"] = epochs
+        model_config["batch_size"] = batch_size
+        model_config["lr"] = learning_rate
+        # Determine device
+        world_size = torch.cuda.device_count()
+        rank = "cpu" if world_size == 0 else "cuda"
+        # Change to outputs directory for writing results
+        original_cwd = os.getcwd()
+        os.chdir(outputs_path)
+        try:
+            # Train model
+            error_values = training.train_regular(
+                rank,
+                world_size,
+                data_path,
+                job_config,
+                training_config,
+                model_config
+            )
+        finally:
+            os.chdir(original_cwd)
+        # Update session info
+        info_file = os.path.join(session_path, "session_info.json")
+        if os.path.exists(info_file):
+            with open(info_file, 'r') as f:
+                session_info = json.load(f)
+            session_info.setdefault("trained_models", []).append({
+                "model_name": model_name,
+                "model_file": model_filename,
+                "model_path": model_path,
+                "trained_at": datetime.now().isoformat(),
+                "epochs": epochs,
+                "train_error": float(error_values[0]) if error_values is not None else None,
+                "val_error": float(error_values[1]) if error_values is not None else None,
+                "test_error": float(error_values[2]) if error_values is not None else None
+            })
+            with open(info_file, 'w') as f:
+                json.dump(session_info, f, indent=2)
+        return {
+            "success": True,
+            "session_id": session_id,
+            "model_name": model_name,
+            "model_file": model_filename,
+            "model_path": model_path,
+            "epochs": epochs,
+            "device_used": rank,
+            "results": {
+                "train_error": float(error_values[0]) if error_values is not None else None,
+                "val_error": float(error_values[1]) if error_values is not None else None,
+                "test_error": float(error_values[2]) if error_values is not None else None
+            },
+            "next_steps": [
+                "Use predict_with_session_model to make predictions",
+                "Use download_model to get the trained model file",
+                "Use evaluate_session_model for detailed evaluation"
+            ]
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="list_session_models", description="List all trained models in a session.")
+def list_session_models(session_id: str) -> dict:
+    """
+    List all trained models in a session.
+    Parameters:
+        session_id (str): The session ID.
+    Returns:
+        dict: List of trained models with their info.
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        models_path = os.path.join(session_path, "models")
+        # Get model files
+        model_files = []
+        if os.path.exists(models_path):
+            for f in os.listdir(models_path):
+                if f.endswith('.pth'):
+                    file_path = os.path.join(models_path, f)
+                    model_files.append({
+                        "filename": f,
+                        "path": file_path,
+                        "size_mb": os.path.getsize(file_path) / (1024 * 1024),
+                        "created": datetime.fromtimestamp(os.path.getctime(file_path)).isoformat()
+                    })
+        # Get training history from session info
+        info_file = os.path.join(session_path, "session_info.json")
+        trained_models = []
+        if os.path.exists(info_file):
+            with open(info_file, 'r') as f:
+                session_info = json.load(f)
+            trained_models = session_info.get("trained_models", [])
+        return {
+            "success": True,
+            "session_id": session_id,
+            "model_files": model_files,
+            "training_history": trained_models,
+            "total_models": len(model_files)
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="predict_with_session_model", description="Make predictions using a trained model from the session.")
+def predict_with_session_model(
+    session_id: str,
+    model_filename: str,
+    structure_contents: Optional[Dict[str, str]] = None,
+    use_session_data: bool = False
+) -> dict:
+    """
+    Make predictions using a trained model.
+    Parameters:
+        session_id (str): The session ID.
+        model_filename (str): Name of the model file (e.g., "CGCNN_demo_20231201.pth").
+        structure_contents (dict, optional): New structures to predict.
+                                            Format: {"name1.cif": "content", ...}
+        use_session_data (bool): If True, predict on the session's training data.
+    Returns:
+        dict: Predictions for each structure.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        model_path = os.path.join(session_path, "models", model_filename)
+        if not os.path.exists(model_path):
+            return {"success": False, "error": f"Model not found: {model_filename}"}
+        # Determine data path
+        if use_session_data:
+            data_path = os.path.join(session_path, "data")
+        elif structure_contents:
+            # Create temp directory for new structures
+            temp_dir = tempfile.mkdtemp(prefix="mcp_predict_")
+            data_path = temp_dir
+            # Write structures
+            for filename, content in structure_contents.items():
+                with open(os.path.join(temp_dir, filename), 'w') as f:
+                    f.write(content)
+            # Create dummy targets.csv
+            struct_names = [os.path.splitext(f)[0] for f in structure_contents.keys()]
+            with open(os.path.join(temp_dir, "targets.csv"), 'w') as f:
+                for name in struct_names:
+                    f.write(f"{name},0.0\n")
+        else:
+            return {
+                "success": False,
+                "error": "Either structure_contents or use_session_data=True must be provided"
+            }
+        # Get dataset
+        dataset = process.get_dataset(data_path, 0, "True")
+        job_config = {
+            "job_name": f"predict_{session_id}",
+            "model_path": model_path,
+            "write_output": "True"
+        }
+        outputs_path = os.path.join(session_path, "outputs")
+        original_cwd = os.getcwd()
+        os.chdir(outputs_path)
+        try:
+            # Run prediction
+            test_error = training.predict(dataset, "l1_loss", job_config)
+            # Read predictions
+            predictions = []
+            output_file = os.path.join(outputs_path, f"predict_{session_id}_predicted_outputs.csv")
+            if os.path.exists(output_file):
+                import csv
+                with open(output_file, 'r') as f:
+                    reader = csv.reader(f)
+                    for row in reader:
+                        if len(row) >= 2:
+                            predictions.append({
+                                "structure_id": row[0],
+                                "predicted_value": float(row[1]) if row[1] else None
+                            })
+        finally:
+            os.chdir(original_cwd)
+            if structure_contents and 'temp_dir' in locals():
+                shutil.rmtree(temp_dir, ignore_errors=True)
+        return {
+            "success": True,
+            "session_id": session_id,
+            "model_used": model_filename,
+            "num_predictions": len(predictions),
+            "predictions": predictions,
+            "mean_absolute_error": float(test_error) if use_session_data else None
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="download_model", description="Get a trained model file as base64 encoded string for download.")
+def download_model(session_id: str, model_filename: str) -> dict:
+    """
+    Get a trained model file as base64 encoded string.
+    You can decode this to get the .pth file.
+    Parameters:
+        session_id (str): The session ID.
+        model_filename (str): Name of the model file.
+    Returns:
+        dict: Base64 encoded model file and metadata.
+    Usage after receiving:
+        import base64
+        model_data = base64.b64decode(result["model_base64"])
+        with open("my_model.pth", "wb") as f:
+            f.write(model_data)
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        model_path = os.path.join(session_path, "models", model_filename)
+        if not os.path.exists(model_path):
+            return {"success": False, "error": f"Model not found: {model_filename}"}
+        # Read and encode model
+        with open(model_path, 'rb') as f:
+            model_bytes = f.read()
+        model_base64 = base64.b64encode(model_bytes).decode('utf-8')
+        return {
+            "success": True,
+            "model_filename": model_filename,
+            "file_size_bytes": len(model_bytes),
+            "file_size_mb": len(model_bytes) / (1024 * 1024),
+            "model_base64": model_base64,
+            "instructions": "Decode with: base64.b64decode(model_base64) and save as .pth file"
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="compare_session_models", description="Compare multiple trained models in a session on the same dataset.")
+def compare_session_models(
+    session_id: str,
+    model_filenames: Optional[List[str]] = None
+) -> dict:
+    """
+    Compare multiple trained models in a session.
+    Parameters:
+        session_id (str): The session ID.
+        model_filenames (list, optional): List of model files to compare. If None, compare all.
+    Returns:
+        dict: Comparison results with rankings.
+    """
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        # Get training history
+        info_file = os.path.join(session_path, "session_info.json")
+        if not os.path.exists(info_file):
+            return {"success": False, "error": "No training history found"}
+        with open(info_file, 'r') as f:
+            session_info = json.load(f)
+        trained_models = session_info.get("trained_models", [])
+        if model_filenames:
+            trained_models = [m for m in trained_models if m.get("model_file") in model_filenames]
+        if len(trained_models) == 0:
+            return {"success": False, "error": "No trained models found"}
+        # Sort by test error
+        sorted_models = sorted(
+            trained_models,
+            key=lambda x: x.get("test_error") or float('inf')
+        )
+        comparison = []
+        for i, model in enumerate(sorted_models):
+            comparison.append({
+                "rank": i + 1,
+                "model_name": model.get("model_name"),
+                "model_file": model.get("model_file"),
+                "train_error": model.get("train_error"),
+                "val_error": model.get("val_error"),
+                "test_error": model.get("test_error"),
+                "epochs": model.get("epochs"),
+                "trained_at": model.get("trained_at")
+            })
+        best_model = sorted_models[0] if sorted_models else None
+        return {
+            "success": True,
+            "session_id": session_id,
+            "num_models_compared": len(comparison),
+            "comparison": comparison,
+            "best_model": {
+                "model_file": best_model.get("model_file"),
+                "model_name": best_model.get("model_name"),
+                "test_error": best_model.get("test_error")
+            } if best_model else None,
+            "recommendation": f"Best model is {best_model.get('model_file')} with test error {best_model.get('test_error'):.4f}" if best_model and best_model.get('test_error') else None
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(name="run_cross_validation_session", description="Run k-fold cross validation on session data.")
+def run_cross_validation_session(
+    session_id: str,
+    model_name: str = "CGCNN_demo",
+    cv_folds: int = 5,
+    epochs: int = 100
+) -> dict:
+    """
+    Run k-fold cross validation on session data.
+    Parameters:
+        session_id (str): The session ID.
+        model_name (str): Model to use (default: "CGCNN_demo").
+        cv_folds (int): Number of folds (default: 5).
+        epochs (int): Training epochs per fold (default: 100).
+    Returns:
+        dict: Cross validation results.
+    """
+    try:
+        if not MATDEEPLEARN_AVAILABLE:
+            return {"success": False, "error": "MatDeepLearn not available"}
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        outputs_path = os.path.join(session_path, "outputs")
+        # Load config
+        config_path = os.path.join(project_root, "config.yml")
+        with open(config_path, "r") as f:
+            config = yaml.load(f, Loader=yaml.FullLoader)
+        if model_name not in config.get("Models", {}):
+            return {"success": False, "error": f"Model '{model_name}' not found"}
+        job_config = {
+            "job_name": f"cv_{session_id}",
+            "reprocess": "False",
+            "model": model_name,
+            "cv_folds": cv_folds,
+            "write_output": "True",
+            "parallel": "False",
+            "seed": np.random.randint(1, 1e6)
+        }
+        training_config = {
+            "target_index": 0,
+            "loss": "l1_loss",
+            "verbosity": 5
+        }
+        model_config = config["Models"][model_name].copy()
+        model_config["epochs"] = epochs
+        world_size = torch.cuda.device_count()
+        rank = "cpu" if world_size == 0 else "cuda"
+        original_cwd = os.getcwd()
+        os.chdir(outputs_path)
+        try:
+            cv_error = training.train_CV(
+                rank,
+                world_size,
+                data_path,
+                job_config,
+                training_config,
+                model_config
+            )
+        finally:
+            os.chdir(original_cwd)
+        return {
+            "success": True,
+            "session_id": session_id,
+            "model_name": model_name,
+            "cv_folds": cv_folds,
+            "epochs_per_fold": epochs,
+            "cv_mean_error": float(cv_error) if cv_error is not None else None,
+            "output_file": f"cv_{session_id}_CV_outputs.csv"
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
 def create_app() -> FastMCP:
     """
     Creates and returns the FastMCP application instance.