Spaces:

SEUyishu
/

m3gnet

Sleeping

App Files Files Community

SEUyishu commited on Dec 4, 2025

Commit

35cea78

verified ·

1 Parent(s): d96f8c7

Update mcp_service.py

Browse files

Files changed (1) hide show

mcp_service.py +351 -27

mcp_service.py CHANGED Viewed

@@ -18,12 +18,16 @@ Spaces or any other environment that supports SSE connections.
 from __future__ import annotations
 import json
 import logging
 import os
 import uuid
 from dataclasses import dataclass, field
 from functools import lru_cache
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 import numpy as np
@@ -127,14 +131,41 @@ def _structure_to_payload(structure: Structure, include_formats: Optional[List[s
 @lru_cache(maxsize=4)
-def _get_potential(model_name: str = "MP-2021.2.8-EFS") -> Potential:
-    """Load and cache a Potential for a given pre-trained model name."""
-    logger.info("Loading potential '%s'", model_name)
-    model = M3GNet.load(model_name)
     return Potential(model)
 def _structure_to_atoms(structure: Structure) -> Atoms:
     """Utility to convert a pymatgen Structure into an ASE Atoms object."""
@@ -165,6 +196,18 @@ def _serialize_relaxation(observer: TrajectoryObserver) -> List[Dict[str, Any]]:
     return frames
 @mcp.tool()
 def list_available_models() -> Dict[str, Any]:
     """Return metadata about bundled and downloadable pre-trained models."""
@@ -182,9 +225,96 @@ def list_available_models() -> Dict[str, Any]:
     }
 @mcp.tool()
 def describe_model(model_name: str = "MP-2021.2.8-EFS") -> Dict[str, Any]:
-    """Return configuration details for a loaded model."""
     potential = _get_potential(model_name)
     config = potential.model.get_config()
@@ -203,7 +333,14 @@ def predict_properties(
     include_forces: bool = True,
     include_stresses: bool = True,
 ) -> Dict[str, Any]:
-    """Compute energy, forces, and stresses for a single structure."""
     target = _decode_structure(structure)
     potential = _get_potential(model_name)
@@ -235,7 +372,15 @@ def batch_predict_properties(
     include_stresses: bool = False,
     batch_size: int = 16,
 ) -> Dict[str, Any]:
-    """Compute energies (and optionally forces/stresses) for multiple structures."""
     if not structures:
         raise ValueError("structures list is empty")
@@ -280,6 +425,57 @@ def batch_predict_properties(
     }
 @mcp.tool()
 def relax_structure(
     structure: Dict[str, Any],
@@ -291,7 +487,18 @@ def relax_structure(
     interval: int = 1,
     include_formats: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
-    """Run a structural relaxation and return the relaxed structure and trajectory."""
     include_formats = include_formats or ["cif"]
     target = _decode_structure(structure)
@@ -335,7 +542,18 @@ def run_molecular_dynamics(
     log_interval: int = 10,
     stress_weight: float = 1 / 160.21766208,
 ) -> Dict[str, Any]:
-    """Run a short molecular dynamics simulation and return sampled frames."""
     target = _decode_structure(structure)
     atoms = _structure_to_atoms(target)
@@ -773,6 +991,7 @@ def load_custom_model(model_path: str) -> Dict[str, Any]:
 def get_training_code_template(
     task_type: str = "potential",
     include_example_data: bool = True,
 ) -> Dict[str, Any]:
     """
     Get Python code template for training M3GNet models locally.
@@ -788,7 +1007,46 @@ def get_training_code_template(
     """
     if task_type == "potential":
-        code = '''"""
 M3GNet Potential Training Script
 ================================
 Train an interatomic potential with energies, forces, and stresses.
@@ -899,7 +1157,40 @@ md = MolecularDynamics(
 md.run(steps=1000)
 '''
     else:  # property
-        code = '''"""
 M3GNet Property Prediction Training Script
 ==========================================
 Train a model to predict scalar material properties.
@@ -973,17 +1264,25 @@ predictions = model.predict_structures(new_structures)
 print(predictions)
 '''
     return {
         "success": True,
         "task_type": task_type,
         "code": code,
-        "instructions": [
-            "1. Install dependencies: pip install m3gnet pymatgen tensorflow",
-            "2. Prepare your training data (structures + labels)",
-            "3. Copy the code template and modify data loading section",
-            "4. Run the script: python train_m3gnet.py",
-            "5. The trained model will be saved to the specified directory",
-        ],
         "tips": [
             "Use fit_per_element_offset=True for better accuracy on formation energies",
             "Adjust batch_size based on your GPU memory",
@@ -996,6 +1295,7 @@ print(predictions)
 @mcp.tool()
 def get_inference_code_template(
     task_type: str = "relaxation",
 ) -> Dict[str, Any]:
     """
     Get Python code template for running M3GNet inference locally.
@@ -1149,22 +1449,30 @@ m3g relax --infile struct1.cif struct2.cif struct3.cif --suffix _relaxed
             "error": f"Unknown task_type: {task_type}",
             "available_types": list(templates.keys()),
         }
     return {
         "success": True,
         "task_type": task_type,
         "code": templates[task_type],
-        "instructions": [
-            "1. Install m3gnet: pip install m3gnet",
-            "2. Copy the code template",
-            "3. Modify the structure loading section for your data",
-            "4. Run the script",
-        ],
     }
 @mcp.tool()
-def get_graph_conversion_code() -> Dict[str, Any]:
     """
     Get code template for converting structures to M3GNet graph format.
@@ -1209,10 +1517,23 @@ graph_list = tf_graph.as_list()  # List format for model.call()
 #  n_triple_bonds, triple_bond_lengths, theta]
 '''
     return {
         "success": True,
         "code": code,
         "description": "Template for converting structures to M3GNet graph format",
     }
@@ -1232,7 +1553,7 @@ def evaluate_model(
         structures: List of structure payloads
         true_energies: Ground truth energies in eV
         true_forces: Optional ground truth forces in eV/Å
-        model_name: Model to evaluate
     Returns:
         Evaluation metrics (MAE, RMSE for energy and forces)
@@ -1288,9 +1609,12 @@ def evaluate_model(
 __all__ = [
     "mcp",
     "list_available_models",
     "describe_model",
     "predict_properties",
     "batch_predict_properties",
     "relax_structure",
     "run_molecular_dynamics",
     "convert_structure_format",

 from __future__ import annotations
+import importlib
 import json
 import logging
 import os
+import pkgutil
+import textwrap
 import uuid
 from dataclasses import dataclass, field
 from functools import lru_cache
+from pathlib import Path
 from typing import Any, Dict, Iterable, List, Optional, Tuple
 import numpy as np
 @lru_cache(maxsize=4)
+def _get_potential(model_reference: str = "MP-2021.2.8-EFS") -> Potential:
+    """Load and cache a Potential from either a named checkpoint or a path."""
+    if not model_reference:
+        raise ValueError("model_reference must be a non-empty string")
+    resolved_path = Path(model_reference).expanduser().resolve() if os.path.exists(model_reference) else None
+    if resolved_path and resolved_path.exists():
+        logger.info("Loading potential from directory '%s'", resolved_path)
+        model = M3GNet.from_dir(str(resolved_path))
+    else:
+        logger.info("Loading potential '%s'", model_reference)
+        model = M3GNet.load(model_reference)
     return Potential(model)
+@lru_cache(maxsize=4)
+def _get_property_model(model_reference: str) -> M3GNet:
+    """Load and cache an M3GNet model for scalar property prediction."""
+    if not model_reference:
+        raise ValueError("model_reference must be provided")
+    resolved_path = Path(model_reference).expanduser().resolve() if os.path.exists(model_reference) else None
+    if resolved_path and resolved_path.exists():
+        logger.info("Loading property model from directory '%s'", resolved_path)
+        return M3GNet.from_dir(str(resolved_path))
+    logger.info("Loading property model '%s'", model_reference)
+    return M3GNet.load(model_reference)
 def _structure_to_atoms(structure: Structure) -> Atoms:
     """Utility to convert a pymatgen Structure into an ASE Atoms object."""
     return frames
+def _maybe_write_script(output_path: Optional[str], code: str) -> Optional[str]:
+    """Write generated script text to disk when an output path is provided."""
+    if not output_path:
+        return None
+    target_path = Path(output_path).expanduser().resolve()
+    target_path.parent.mkdir(parents=True, exist_ok=True)
+    target_path.write_text(code, encoding="utf-8")
+    return str(target_path)
 @mcp.tool()
 def list_available_models() -> Dict[str, Any]:
     """Return metadata about bundled and downloadable pre-trained models."""
     }
+@mcp.tool()
+def list_library_components() -> Dict[str, Any]:
+    """Enumerate key modules, submodules, and public symbols in the m3gnet package."""
+    base_modules = [
+        "m3gnet",
+        "m3gnet.models",
+        "m3gnet.trainers",
+        "m3gnet.graph",
+        "m3gnet.layers",
+        "m3gnet.utils",
+        "m3gnet.callbacks",
+        "m3gnet.config",
+        "m3gnet.type",
+        "m3gnet.cli",
+    ]
+    overview: Dict[str, Any] = {}
+    for module_name in base_modules:
+        try:
+            module = importlib.import_module(module_name)
+        except Exception as exc:  # noqa: BLE001
+            overview[module_name] = {"error": str(exc)}
+            continue
+        public_symbols = getattr(module, "__all__", None)
+        if public_symbols is None:
+            public_symbols = [name for name in dir(module) if not name.startswith("_")]
+        submodules: List[str] = []
+        module_path = getattr(module, "__path__", None)
+        if module_path:
+            submodules = sorted(
+                f"{module_name}.{info.name}" for info in pkgutil.iter_modules(module_path)
+            )
+        overview[module_name] = {
+            "public_symbols": sorted(public_symbols),
+            "submodules": submodules,
+            "doc": textwrap.shorten((module.__doc__ or "").strip(), width=120, placeholder="..."),
+        }
+    return {"success": True, "overview": overview}
+@mcp.tool()
+def get_component_documentation(target: str) -> Dict[str, Any]:
+    """Return docstrings and metadata for a given m3gnet component.
+    The *target* parameter accepts "module" or "module:attribute" syntax.
+    Examples: "m3gnet.models", "m3gnet.layers._basis:RadialBasisLayer".
+    """
+    if not target or not target.strip():
+        raise ValueError("target must be a non-empty string")
+    module_name = target
+    attr_name: Optional[str] = None
+    if ":" in target:
+        module_name, attr_name = target.split(":", 1)
+    module_name = module_name.strip()
+    attr_name = attr_name.strip() if attr_name else None
+    module = importlib.import_module(module_name)
+    obj = getattr(module, attr_name) if attr_name else module
+    doc = textwrap.dedent(obj.__doc__ or "").strip() or "No documentation available."
+    metadata = {
+        "module": module_name,
+        "object_type": type(obj).__name__,
+        "has_attributes": bool(getattr(obj, "__dict__", {})) if attr_name else False,
+    }
+    if not attr_name and hasattr(module, "__all__"):
+        metadata["exported_names"] = list(module.__all__)
+    return {"success": True, "doc": doc, "metadata": metadata}
 @mcp.tool()
 def describe_model(model_name: str = "MP-2021.2.8-EFS") -> Dict[str, Any]:
+    """Return configuration details for a loaded model or custom checkpoint.
+    Args:
+        model_name: Pre-trained identifier (e.g. "MP-2021.2.8-EFS") or path to a
+            directory containing an exported M3GNet model.
+    """
     potential = _get_potential(model_name)
     config = potential.model.get_config()
     include_forces: bool = True,
     include_stresses: bool = True,
 ) -> Dict[str, Any]:
+    """Compute energy, forces, and stresses for a single structure.
+    Args:
+        structure: Serialized structure payload.
+        model_name: Pre-trained model name or custom checkpoint directory.
+        include_forces: Whether to include force components in the response.
+        include_stresses: Whether to include the Voigt stress tensor.
+    """
     target = _decode_structure(structure)
     potential = _get_potential(model_name)
     include_stresses: bool = False,
     batch_size: int = 16,
 ) -> Dict[str, Any]:
+    """Compute energies (and optionally forces/stresses) for multiple structures.
+    Args:
+        structures: Sequence of serialized structures.
+        model_name: Pre-trained model name or path to a custom checkpoint.
+        include_forces: If True, include atomic forces for each structure.
+        include_stresses: If True, include stress tensors when available.
+        batch_size: Batch size used for batched predictions.
+    """
     if not structures:
         raise ValueError("structures list is empty")
     }
+@mcp.tool()
+def predict_scalar_property(
+    structures: List[Dict[str, Any]],
+    model_name: str,
+    batch_size: int = 32,
+    return_numpy: bool = False,
+) -> Dict[str, Any]:
+    """Predict scalar material properties using a property model or custom checkpoint.
+    Args:
+        structures: List of serialized structure payloads.
+        model_name: Pre-trained property model identifier or path to saved model.
+        batch_size: Batch size for batched inference.
+        return_numpy: If True, include the full numpy array (as nested lists) in the response.
+    """
+    if not structures:
+        raise ValueError("structures list is empty")
+    decoded = [_decode_structure(item) for item in structures]
+    model = _get_property_model(model_name)
+    predictions = model.predict_structures(decoded, batch_size=batch_size)
+    flat_values = np.asarray(predictions).reshape(-1)
+    results = [
+        {
+            "structure_index": idx,
+            "formula": struct.composition.reduced_formula,
+            "num_sites": struct.num_sites,
+            "value": float(flat_values[idx]),
+        }
+        for idx, struct in enumerate(decoded)
+    ]
+    response: Dict[str, Any] = {
+        "success": True,
+        "model_name": model_name,
+        "batch_size": batch_size,
+        "predictions": results,
+    }
+    if return_numpy:
+        response["raw_array"] = flat_values.tolist()
+    if len(results) == 1:
+        response["value"] = results[0]["value"]
+    return response
 @mcp.tool()
 def relax_structure(
     structure: Dict[str, Any],
     interval: int = 1,
     include_formats: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
+    """Run a structural relaxation and return the relaxed structure and trajectory.
+    Args:
+        structure: Serialized structure payload for relaxation.
+        model_name: Pre-trained identifier or custom checkpoint path.
+        fmax: Force convergence threshold in eV/Å.
+        steps: Maximum optimizer steps.
+        relax_cell: Whether to relax lattice parameters.
+        optimizer: Optimizer name supported by :class:`Relaxer`.
+        interval: Interval (in steps) for recording trajectory frames.
+        include_formats: Extra serialization formats to add to the response.
+    """
     include_formats = include_formats or ["cif"]
     target = _decode_structure(structure)
     log_interval: int = 10,
     stress_weight: float = 1 / 160.21766208,
 ) -> Dict[str, Any]:
+    """Run a short molecular dynamics simulation and return sampled frames.
+    Args:
+        structure: Serialized structure payload.
+        model_name: Pre-trained identifier or custom checkpoint path.
+        ensemble: Statistical ensemble name ("nvt", "npt", etc.).
+        temperature: Target temperature in Kelvin.
+        timestep_fs: Time step in femtoseconds.
+        steps: Number of MD steps to simulate.
+        log_interval: Interval between recorded frames.
+        stress_weight: Coupling factor for stress control.
+    """
     target = _decode_structure(structure)
     atoms = _structure_to_atoms(target)
 def get_training_code_template(
     task_type: str = "potential",
     include_example_data: bool = True,
+    output_path: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Get Python code template for training M3GNet models locally.
     """
     if task_type == "potential":
+        if not include_example_data:
+            code = textwrap.dedent(
+                """
+                \"\"\"Minimal M3GNet potential training skeleton.\"\"\"
+                import tensorflow as tf
+                from m3gnet.models import M3GNet, Potential
+                from m3gnet.trainers import PotentialTrainer
+                def train_potential(structures, energies, forces, **kwargs):
+                    model = M3GNet(is_intensive=False)
+                    potential = Potential(model=model)
+                    optimizer = tf.keras.optimizers.Adam(kwargs.get("learning_rate", 1e-3))
+                    trainer = PotentialTrainer(potential=potential, optimizer=optimizer)
+                    trainer.train(
+                        structures,
+                        energies,
+                        forces,
+                        stresses=kwargs.get("stresses"),
+                        validation_graphs_or_structures=kwargs.get("val_structures"),
+                        val_energies=kwargs.get("val_energies"),
+                        val_forces=kwargs.get("val_forces"),
+                        val_stresses=kwargs.get("val_stresses"),
+                        batch_size=kwargs.get("batch_size", 16),
+                        epochs=kwargs.get("epochs", 200),
+                        force_loss_ratio=kwargs.get("force_loss_ratio", 1.0),
+                        stress_loss_ratio=kwargs.get("stress_loss_ratio", 0.1),
+                        fit_per_element_offset=kwargs.get("fit_per_element_offset", True),
+                        verbose=kwargs.get("verbose", 1),
+                    )
+                    return potential
+                if __name__ == "__main__":
+                    raise SystemExit("Replace this stub with your data loading pipeline and call train_potential(...).")
+                """
+            )
+        else:
+            code = '''"""
 M3GNet Potential Training Script
 ================================
 Train an interatomic potential with energies, forces, and stresses.
 md.run(steps=1000)
 '''
     else:  # property
+        if not include_example_data:
+            code = textwrap.dedent(
+                """
+                \"\"\"Minimal M3GNet property model training skeleton.\"\"\"
+                import tensorflow as tf
+                from m3gnet.models import M3GNet
+                from m3gnet.trainers import Trainer
+                def train_property_model(structures, targets, **kwargs):
+                    model = M3GNet(is_intensive=True)
+                    optimizer = tf.keras.optimizers.Adam(kwargs.get("learning_rate", 1e-3))
+                    trainer = Trainer(model=model, optimizer=optimizer)
+                    trainer.train(
+                        structures,
+                        targets,
+                        validation_graphs_or_structures=kwargs.get("val_structures"),
+                        validation_targets=kwargs.get("val_targets"),
+                        batch_size=kwargs.get("batch_size", 32),
+                        epochs=kwargs.get("epochs", 300),
+                        early_stop_patience=kwargs.get("early_stop_patience", 100),
+                        fit_per_element_offset=kwargs.get("fit_per_element_offset", True),
+                        verbose=kwargs.get("verbose", 1),
+                    )
+                    return model
+                if __name__ == "__main__":
+                    raise SystemExit("Provide training data and call train_property_model(...)")
+                """
+            )
+        else:
+            code = '''"""
 M3GNet Property Prediction Training Script
 ==========================================
 Train a model to predict scalar material properties.
 print(predictions)
 '''
+    script_path = _maybe_write_script(output_path, code)
+    instructions = [
+        "1. Install dependencies: pip install m3gnet pymatgen tensorflow",
+        "2. Prepare your training data (structures + labels)",
+        "3. Copy the code template and modify data loading section",
+        "4. Run the script: python train_m3gnet.py",
+        "5. The trained model will be saved to the specified directory",
+    ]
+    if script_path:
+        instructions.insert(0, f"Script written to {script_path}")
     return {
         "success": True,
         "task_type": task_type,
         "code": code,
+        "output_path": script_path,
+        "instructions": instructions,
         "tips": [
             "Use fit_per_element_offset=True for better accuracy on formation energies",
             "Adjust batch_size based on your GPU memory",
 @mcp.tool()
 def get_inference_code_template(
     task_type: str = "relaxation",
+    output_path: Optional[str] = None,
 ) -> Dict[str, Any]:
     """
     Get Python code template for running M3GNet inference locally.
             "error": f"Unknown task_type: {task_type}",
             "available_types": list(templates.keys()),
         }
+    script_path = _maybe_write_script(output_path, templates[task_type])
+    instructions = [
+        "1. Install m3gnet: pip install m3gnet",
+        "2. Copy the code template",
+        "3. Modify the structure loading section for your data",
+        "4. Run the script",
+    ]
+    if script_path:
+        instructions.insert(0, f"Script written to {script_path}")
     return {
         "success": True,
         "task_type": task_type,
         "code": templates[task_type],
+        "output_path": script_path,
+        "instructions": instructions,
     }
 @mcp.tool()
+def get_graph_conversion_code(output_path: Optional[str] = None) -> Dict[str, Any]:
     """
     Get code template for converting structures to M3GNet graph format.
 #  n_triple_bonds, triple_bond_lengths, theta]
 '''
+    script_path = _maybe_write_script(output_path, code)
+    instructions = [
+        "1. Install m3gnet and pymatgen",
+        "2. Place your structure file alongside the script",
+        "3. Run the script to print graph statistics",
+    ]
+    if script_path:
+        instructions.insert(0, f"Script written to {script_path}")
     return {
         "success": True,
         "code": code,
+        "output_path": script_path,
         "description": "Template for converting structures to M3GNet graph format",
+        "instructions": instructions,
     }
         structures: List of structure payloads
         true_energies: Ground truth energies in eV
         true_forces: Optional ground truth forces in eV/Å
+        model_name: Pre-trained identifier or path to custom checkpoint
     Returns:
         Evaluation metrics (MAE, RMSE for energy and forces)
 __all__ = [
     "mcp",
     "list_available_models",
+    "list_library_components",
     "describe_model",
+    "get_component_documentation",
     "predict_properties",
     "batch_predict_properties",
+    "predict_scalar_property",
     "relax_structure",
     "run_molecular_dynamics",
     "convert_structure_format",