Spaces:

guohanghui
/

deepTools

Sleeping

App Files Files Community

guohanghui commited on Mar 7

Commit

a35de1c

verified ·

1 Parent(s): 1495e8a

Update deepTools/mcp_output/mcp_plugin/mcp_service.py

Browse files

Files changed (1) hide show

deepTools/mcp_output/mcp_plugin/mcp_service.py +443 -77

deepTools/mcp_output/mcp_plugin/mcp_service.py CHANGED Viewed

@@ -1,127 +1,493 @@
-import sys
-import os
-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../repo/deepTools')))
-# Ensure fastmcp is installed
-try:
-    import fastmcp
-except ImportError:
-    raise ImportError("The 'fastmcp' module is not installed. Please install it using 'pip install fastmcp'.")
 from fastmcp import FastMCP
-# Create the FastMCP service application
 mcp = FastMCP("deeptools_service")
-@mcp.tool(name="alignment_sieve", description="Process alignments using alignmentSieve")
-def alignment_sieve(input_file: str, output_file: str, options: dict) -> dict:
     """
-    Process alignments using alignmentSieve.
-    Parameters:
-    - input_file: Path to the input alignment file.
-    - output_file: Path to save the processed alignment file.
-    - options: Dictionary of options for alignmentSieve.
     Returns:
-    - dict: Success status and output file path.
     """
     try:
-        from deeptools import alignmentSieve
-        alignmentSieve.main([input_file, output_file, *options])
-        return {"success": True, "output_file": output_file}
     except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="bam_compare", description="Compare BAM files using bamCompare")
-def bam_compare(bam1: str, bam2: str, output_file: str, options: dict) -> dict:
     """
-    Compare two BAM files using bamCompare.
-    Parameters:
-    - bam1: Path to the first BAM file.
-    - bam2: Path to the second BAM file.
-    - output_file: Path to save the comparison result.
-    - options: Dictionary of options for bamCompare.
     Returns:
-    - dict: Success status and output file path.
     """
     try:
-        from deeptools import bamCompare
-        bamCompare.main([bam1, bam2, output_file, *options])
-        return {"success": True, "output_file": output_file}
     except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="bam_coverage", description="Generate coverage files from BAM using bamCoverage")
-def bam_coverage(bam_file: str, output_file: str, options: dict) -> dict:
     """
-    Generate coverage files from a BAM file using bamCoverage.
-    Parameters:
-    - bam_file: Path to the BAM file.
-    - output_file: Path to save the coverage file.
-    - options: Dictionary of options for bamCoverage.
     Returns:
-    - dict: Success status and output file path.
     """
     try:
-        from deeptools import bamCoverage
-        bamCoverage.main([bam_file, output_file, *options])
-        return {"success": True, "output_file": output_file}
     except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="compute_gc_bias", description="Compute GC bias using computeGCBias")
-def compute_gc_bias(bam_file: str, genome_file: str, output_file: str, options: dict) -> dict:
     """
-    Compute GC bias for a BAM file using computeGCBias.
-    Parameters:
-    - bam_file: Path to the BAM file.
-    - genome_file: Path to the genome file.
-    - output_file: Path to save the GC bias results.
-    - options: Dictionary of options for computeGCBias.
     Returns:
-    - dict: Success status and output file path.
     """
     try:
-        from deeptools import computeGCBias
-        computeGCBias.main([bam_file, genome_file, output_file, *options])
-        return {"success": True, "output_file": output_file}
     except Exception as e:
-        return {"success": False, "error": str(e)}
-@mcp.tool(name="plot_heatmap", description="Generate heatmaps using plotHeatmap")
-def plot_heatmap(matrix_file: str, output_file: str, options: dict) -> dict:
     """
-    Generate heatmaps from a matrix file using plotHeatmap.
-    Parameters:
-    - matrix_file: Path to the matrix file.
-    - output_file: Path to save the heatmap.
-    - options: Dictionary of options for plotHeatmap.
     Returns:
-    - dict: Success status and output file path.
     """
     try:
-        from deeptools import plotHeatmap
-        plotHeatmap.main([matrix_file, output_file, *options])
-        return {"success": True, "output_file": output_file}
     except Exception as e:
-        return {"success": False, "error": str(e)}
-# Additional tools for other deepTools functionalities can be added here following the same pattern.
-def create_app() -> FastMCP:
     """
-    Create and return the FastMCP application instance.
     Returns:
-    - FastMCP: The FastMCP application instance.
     """
-    return mcp

+"""
+deepTools MCP Service - NGS Data Analysis Tools
+Provides tools for analyzing high-throughput sequencing data,
+including BAM/bigWig processing, correlation analysis, and visualization.
+"""
 from fastmcp import FastMCP
+from typing import Optional, List, Dict, Any
+import numpy as np
+import tempfile
+import os
 mcp = FastMCP("deeptools_service")
+@mcp.tool(name="compute_correlation", description="Compute correlation matrix from sample data")
+def compute_correlation(
+    matrix_file: str,
+    method: str = "pearson",
+    skip_zeros: bool = False,
+    remove_outliers: bool = False,
+    log1p: bool = False
+) -> Dict[str, Any]:
     """
+    Compute correlation between samples from a matrix file.
+    Args:
+        matrix_file: Path to npz matrix file (from multiBamSummary/multiBigwigSummary)
+        method: Correlation method ('pearson' or 'spearman')
+        skip_zeros: Skip rows with only zeros
+        remove_outliers: Remove outliers before correlation
+        log1p: Apply log1p transformation
+    Returns:
+        Dictionary with correlation matrix and labels
+    """
+    try:
+        from deeptools.correlation import Correlation
+        corr = Correlation(
+            matrix_file,
+            corr_method=method,
+            skip_zeros=skip_zeros,
+            remove_outliers=remove_outliers,
+            log1p=log1p
+        )
+        return {
+            "success": True,
+            "result": {
+                "correlation_matrix": corr.corr_matrix.tolist(),
+                "labels": corr.labels,
+                "method": method
+            },
+            "error": None
+        }
+    except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="get_gc_content", description="Calculate GC content for a genomic region")
+def get_gc_content(
+    twobit_file: str,
+    chrom: str,
+    start: int,
+    end: int
+) -> Dict[str, Any]:
+    """
+    Calculate GC content for a genomic region.
+    Args:
+        twobit_file: Path to 2bit genome file
+        chrom: Chromosome name
+        start: Start position
+        end: End position
     Returns:
+        Dictionary with GC content
     """
     try:
+        import py2bit
+        from deeptools.utilities import getGC_content
+        tb = py2bit.open(twobit_file)
+        gc = getGC_content(tb, chrom, start, end)
+        tb.close()
+        return {
+            "success": True,
+            "result": {"gc_content": gc, "region": f"{chrom}:{start}-{end}"},
+            "error": None
+        }
     except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="get_bam_stats", description="Get statistics from a BAM file")
+def get_bam_stats(bam_file: str) -> Dict[str, Any]:
     """
+    Get basic statistics from a BAM file.
+    Args:
+        bam_file: Path to BAM file
+    Returns:
+        Dictionary with BAM file statistics
+    """
+    try:
+        from deeptools.bamHandler import openBam
+        bam = openBam(bam_file)
+        stats = {
+            "mapped_reads": bam.mapped,
+            "unmapped_reads": bam.unmapped,
+            "references": list(bam.references),
+            "lengths": list(bam.lengths),
+            "num_references": bam.nreferences
+        }
+        bam.close()
+        return {"success": True, "result": stats, "error": None}
+    except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="get_bigwig_stats", description="Get statistics from a bigWig file")
+def get_bigwig_stats(
+    bigwig_file: str,
+    chrom: Optional[str] = None,
+    start: Optional[int] = None,
+    end: Optional[int] = None
+) -> Dict[str, Any]:
+    """
+    Get statistics from a bigWig file.
+    Args:
+        bigwig_file: Path to bigWig file
+        chrom: Optional chromosome to get stats for
+        start: Optional start position
+        end: Optional end position
     Returns:
+        Dictionary with bigWig statistics
     """
     try:
+        import pyBigWig
+        bw = pyBigWig.open(bigwig_file)
+        result = {
+            "chromosomes": dict(bw.chroms()),
+            "is_bigwig": bw.isBigWig()
+        }
+        if chrom and start is not None and end is not None:
+            result["region_stats"] = {
+                "mean": bw.stats(chrom, start, end, type="mean")[0],
+                "min": bw.stats(chrom, start, end, type="min")[0],
+                "max": bw.stats(chrom, start, end, type="max")[0],
+                "std": bw.stats(chrom, start, end, type="std")[0],
+                "sum": bw.stats(chrom, start, end, type="sum")[0]
+            }
+        bw.close()
+        return {"success": True, "result": result, "error": None}
     except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="get_bigwig_values", description="Get values from a bigWig file for a region")
+def get_bigwig_values(
+    bigwig_file: str,
+    chrom: str,
+    start: int,
+    end: int
+) -> Dict[str, Any]:
+    """
+    Get signal values from a bigWig file for a specific region.
+    Args:
+        bigwig_file: Path to bigWig file
+        chrom: Chromosome name
+        start: Start position
+        end: End position
+    Returns:
+        Dictionary with signal values
+    """
+    try:
+        import pyBigWig
+        bw = pyBigWig.open(bigwig_file)
+        values = bw.values(chrom, start, end)
+        bw.close()
+        # Convert to list, handling NaN values
+        values_list = [float(v) if not np.isnan(v) else None for v in values]
+        return {
+            "success": True,
+            "result": {
+                "region": f"{chrom}:{start}-{end}",
+                "values": values_list,
+                "length": len(values_list)
+            },
+            "error": None
+        }
+    except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="compute_matrix_stats", description="Get statistics from a deepTools matrix file")
+def compute_matrix_stats(matrix_file: str) -> Dict[str, Any]:
+    """
+    Get statistics from a deepTools matrix file (from computeMatrix).
+    Args:
+        matrix_file: Path to matrix file (.gz or .npz)
+    Returns:
+        Dictionary with matrix statistics
     """
+    try:
+        from deeptools.heatmapper import heatmapper
+        hm = heatmapper()
+        hm.read_matrix_file(matrix_file)
+        result = {
+            "num_samples": len(hm.matrix.sample_labels),
+            "sample_labels": hm.matrix.sample_labels,
+            "num_regions": hm.matrix.get_num_groups(),
+            "group_labels": hm.matrix.group_labels,
+            "matrix_shape": list(hm.matrix.matrix.shape) if hasattr(hm.matrix.matrix, 'shape') else None,
+            "parameters": hm.parameters if hasattr(hm, 'parameters') else {}
+        }
+        return {"success": True, "result": result, "error": None}
+    except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="smart_labels", description="Generate smart labels from file paths")
+def smart_labels(file_paths: List[str]) -> Dict[str, Any]:
+    """
+    Generate clean labels from file paths by removing path and extension.
+    Args:
+        file_paths: List of file paths
     Returns:
+        Dictionary with clean labels
     """
     try:
+        from deeptools.utilities import smartLabels
+        labels = smartLabels(file_paths)
+        return {
+            "success": True,
+            "result": {
+                "original": file_paths,
+                "labels": labels
+            },
+            "error": None
+        }
     except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="load_npz_matrix", description="Load and inspect a numpy matrix file")
+def load_npz_matrix(matrix_file: str) -> Dict[str, Any]:
+    """
+    Load and inspect a deepTools npz matrix file.
+    Args:
+        matrix_file: Path to npz file
+    Returns:
+        Dictionary with matrix information
     """
+    try:
+        data = np.load(matrix_file, allow_pickle=True)
+        result = {
+            "keys": list(data.keys()),
+            "shapes": {k: list(data[k].shape) if hasattr(data[k], 'shape') else None
+                      for k in data.keys()},
+            "dtypes": {k: str(data[k].dtype) if hasattr(data[k], 'dtype') else type(data[k]).__name__
+                      for k in data.keys()}
+        }
+        # If labels exist, include them
+        if 'labels' in data:
+            labels = data['labels']
+            if hasattr(labels, 'tolist'):
+                result['labels'] = [str(l) for l in labels.tolist()]
+            else:
+                result['labels'] = [str(l) for l in labels]
+        return {"success": True, "result": result, "error": None}
+    except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="calculate_correlation_stats", description="Calculate correlation statistics between two arrays")
+def calculate_correlation_stats(
+    array1: List[float],
+    array2: List[float],
+    method: str = "pearson"
+) -> Dict[str, Any]:
+    """
+    Calculate correlation statistics between two numeric arrays.
+    Args:
+        array1: First array of values
+        array2: Second array of values
+        method: Correlation method ('pearson' or 'spearman')
     Returns:
+        Dictionary with correlation coefficient and p-value
     """
     try:
+        import scipy.stats
+        a1 = np.array(array1)
+        a2 = np.array(array2)
+        # Remove NaN values
+        mask = ~(np.isnan(a1) | np.isnan(a2))
+        a1 = a1[mask]
+        a2 = a2[mask]
+        if method == "spearman":
+            corr, pval = scipy.stats.spearmanr(a1, a2)
+        else:
+            corr, pval = scipy.stats.pearsonr(a1, a2)
+        return {
+            "success": True,
+            "result": {
+                "correlation": float(corr),
+                "p_value": float(pval),
+                "method": method,
+                "n_samples": len(a1)
+            },
+            "error": None
+        }
     except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="list_deeptools_commands", description="List available deepTools command-line tools")
+def list_deeptools_commands() -> Dict[str, Any]:
     """
+    List all available deepTools command-line tools and their descriptions.
+    Returns:
+        Dictionary with tool names and descriptions
+    """
+    tools = {
+        "BAM tools": {
+            "alignmentSieve": "Filter alignments from BAM files",
+            "bamCompare": "Compare two BAM files based on the number of mapped reads",
+            "bamCoverage": "Calculate genome coverage from BAM file",
+            "bamPEFragmentSize": "Calculate fragment sizes in paired-end data",
+            "estimateReadFiltering": "Estimate the number of reads filtered by alignmentSieve",
+            "multiBamSummary": "Summarize multiple BAM files"
+        },
+        "bigWig tools": {
+            "bigwigAverage": "Average multiple bigWig files",
+            "bigwigCompare": "Compare two bigWig files",
+            "multiBigwigSummary": "Summarize multiple bigWig files"
+        },
+        "Matrix tools": {
+            "computeMatrix": "Calculate scores per genome regions",
+            "computeMatrixOperations": "Modify computeMatrix output"
+        },
+        "Visualization": {
+            "plotCorrelation": "Plot correlation heatmap",
+            "plotCoverage": "Plot coverage",
+            "plotEnrichment": "Plot enrichment",
+            "plotFingerprint": "Plot fingerprint",
+            "plotHeatmap": "Plot heatmap from computeMatrix output",
+            "plotPCA": "Plot PCA",
+            "plotProfile": "Plot profile from computeMatrix output"
+        },
+        "GC bias": {
+            "computeGCBias": "Compute GC bias",
+            "correctGCBias": "Correct GC bias"
+        }
+    }
+    return {"success": True, "result": tools, "error": None}
+@mcp.tool(name="get_chromosome_sizes", description="Get chromosome sizes from a BAM or bigWig file")
+def get_chromosome_sizes(file_path: str) -> Dict[str, Any]:
+    """
+    Get chromosome sizes from a BAM or bigWig file.
+    Args:
+        file_path: Path to BAM or bigWig file
     Returns:
+        Dictionary with chromosome names and sizes
     """
     try:
+        if file_path.endswith('.bam'):
+            from deeptools.bamHandler import openBam
+            bam = openBam(file_path)
+            sizes = dict(zip(bam.references, bam.lengths))
+            bam.close()
+        elif file_path.endswith('.bw') or file_path.endswith('.bigwig') or file_path.endswith('.bigWig'):
+            import pyBigWig
+            bw = pyBigWig.open(file_path)
+            sizes = dict(bw.chroms())
+            bw.close()
+        else:
+            return {"success": False, "result": None, "error": "Unsupported file format"}
+        return {
+            "success": True,
+            "result": {
+                "chromosome_sizes": sizes,
+                "total_size": sum(sizes.values()),
+                "num_chromosomes": len(sizes)
+            },
+            "error": None
+        }
     except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+@mcp.tool(name="bin_coverage", description="Calculate binned coverage statistics")
+def bin_coverage(
+    values: List[float],
+    bin_size: int = 100
+) -> Dict[str, Any]:
     """
+    Calculate binned coverage statistics from signal values.
+    Args:
+        values: List of signal values
+        bin_size: Size of each bin
     Returns:
+        Dictionary with binned statistics
     """
+    try:
+        arr = np.array(values)
+        # Remove NaN values for statistics
+        valid = arr[~np.isnan(arr)]
+        # Calculate bins
+        n_bins = len(arr) // bin_size
+        if n_bins == 0:
+            n_bins = 1
+        binned = np.array_split(arr, n_bins)
+        bin_means = [float(np.nanmean(b)) for b in binned]
+        bin_stds = [float(np.nanstd(b)) for b in binned]
+        return {
+            "success": True,
+            "result": {
+                "overall_mean": float(np.nanmean(valid)) if len(valid) > 0 else None,
+                "overall_std": float(np.nanstd(valid)) if len(valid) > 0 else None,
+                "overall_min": float(np.nanmin(valid)) if len(valid) > 0 else None,
+                "overall_max": float(np.nanmax(valid)) if len(valid) > 0 else None,
+                "bin_means": bin_means,
+                "bin_stds": bin_stds,
+                "n_bins": n_bins,
+                "bin_size": bin_size
+            },
+            "error": None
+        }
+    except Exception as e:
+        return {"success": False, "result": None, "error": str(e)}
+def create_app() -> FastMCP:
+    """Create and return the FastMCP application instance."""
+    return mcp
+if __name__ == "__main__":
+    mcp.run(transport="http", host="0.0.0.0", port=8000)