guohanghui commited on
Commit
a35de1c
·
verified ·
1 Parent(s): 1495e8a

Update deepTools/mcp_output/mcp_plugin/mcp_service.py

Browse files
deepTools/mcp_output/mcp_plugin/mcp_service.py CHANGED
@@ -1,127 +1,493 @@
1
- import sys
2
- import os
3
- sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../repo/deepTools')))
4
 
5
- # Ensure fastmcp is installed
6
- try:
7
- import fastmcp
8
- except ImportError:
9
- raise ImportError("The 'fastmcp' module is not installed. Please install it using 'pip install fastmcp'.")
10
 
11
  from fastmcp import FastMCP
 
 
 
 
12
 
13
- # Create the FastMCP service application
14
  mcp = FastMCP("deeptools_service")
15
 
16
- @mcp.tool(name="alignment_sieve", description="Process alignments using alignmentSieve")
17
- def alignment_sieve(input_file: str, output_file: str, options: dict) -> dict:
 
 
 
 
 
 
 
18
  """
19
- Process alignments using alignmentSieve.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- Parameters:
22
- - input_file: Path to the input alignment file.
23
- - output_file: Path to save the processed alignment file.
24
- - options: Dictionary of options for alignmentSieve.
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  Returns:
27
- - dict: Success status and output file path.
28
  """
29
  try:
30
- from deeptools import alignmentSieve
31
- alignmentSieve.main([input_file, output_file, *options])
32
- return {"success": True, "output_file": output_file}
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
34
- return {"success": False, "error": str(e)}
 
35
 
36
- @mcp.tool(name="bam_compare", description="Compare BAM files using bamCompare")
37
- def bam_compare(bam1: str, bam2: str, output_file: str, options: dict) -> dict:
38
  """
39
- Compare two BAM files using bamCompare.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- Parameters:
42
- - bam1: Path to the first BAM file.
43
- - bam2: Path to the second BAM file.
44
- - output_file: Path to save the comparison result.
45
- - options: Dictionary of options for bamCompare.
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  Returns:
48
- - dict: Success status and output file path.
49
  """
50
  try:
51
- from deeptools import bamCompare
52
- bamCompare.main([bam1, bam2, output_file, *options])
53
- return {"success": True, "output_file": output_file}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  except Exception as e:
55
- return {"success": False, "error": str(e)}
 
56
 
57
- @mcp.tool(name="bam_coverage", description="Generate coverage files from BAM using bamCoverage")
58
- def bam_coverage(bam_file: str, output_file: str, options: dict) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  """
60
- Generate coverage files from a BAM file using bamCoverage.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- Parameters:
63
- - bam_file: Path to the BAM file.
64
- - output_file: Path to save the coverage file.
65
- - options: Dictionary of options for bamCoverage.
66
 
 
 
 
 
 
 
 
 
67
  Returns:
68
- - dict: Success status and output file path.
69
  """
70
  try:
71
- from deeptools import bamCoverage
72
- bamCoverage.main([bam_file, output_file, *options])
73
- return {"success": True, "output_file": output_file}
 
 
 
 
 
 
 
 
 
74
  except Exception as e:
75
- return {"success": False, "error": str(e)}
 
76
 
77
- @mcp.tool(name="compute_gc_bias", description="Compute GC bias using computeGCBias")
78
- def compute_gc_bias(bam_file: str, genome_file: str, output_file: str, options: dict) -> dict:
 
 
 
 
 
 
 
 
79
  """
80
- Compute GC bias for a BAM file using computeGCBias.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- Parameters:
83
- - bam_file: Path to the BAM file.
84
- - genome_file: Path to the genome file.
85
- - output_file: Path to save the GC bias results.
86
- - options: Dictionary of options for computeGCBias.
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  Returns:
89
- - dict: Success status and output file path.
90
  """
91
  try:
92
- from deeptools import computeGCBias
93
- computeGCBias.main([bam_file, genome_file, output_file, *options])
94
- return {"success": True, "output_file": output_file}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  except Exception as e:
96
- return {"success": False, "error": str(e)}
 
97
 
98
- @mcp.tool(name="plot_heatmap", description="Generate heatmaps using plotHeatmap")
99
- def plot_heatmap(matrix_file: str, output_file: str, options: dict) -> dict:
100
  """
101
- Generate heatmaps from a matrix file using plotHeatmap.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
- Parameters:
104
- - matrix_file: Path to the matrix file.
105
- - output_file: Path to save the heatmap.
106
- - options: Dictionary of options for plotHeatmap.
107
 
 
 
 
 
 
 
 
 
108
  Returns:
109
- - dict: Success status and output file path.
110
  """
111
  try:
112
- from deeptools import plotHeatmap
113
- plotHeatmap.main([matrix_file, output_file, *options])
114
- return {"success": True, "output_file": output_file}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  except Exception as e:
116
- return {"success": False, "error": str(e)}
117
 
118
- # Additional tools for other deepTools functionalities can be added here following the same pattern.
119
 
120
- def create_app() -> FastMCP:
 
 
 
 
121
  """
122
- Create and return the FastMCP application instance.
123
-
 
 
 
 
124
  Returns:
125
- - FastMCP: The FastMCP application instance.
126
  """
127
- return mcp
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ deepTools MCP Service - NGS Data Analysis Tools
 
3
 
4
+ Provides tools for analyzing high-throughput sequencing data,
5
+ including BAM/bigWig processing, correlation analysis, and visualization.
6
+ """
 
 
7
 
8
  from fastmcp import FastMCP
9
+ from typing import Optional, List, Dict, Any
10
+ import numpy as np
11
+ import tempfile
12
+ import os
13
 
 
14
  mcp = FastMCP("deeptools_service")
15
 
16
+
17
+ @mcp.tool(name="compute_correlation", description="Compute correlation matrix from sample data")
18
+ def compute_correlation(
19
+ matrix_file: str,
20
+ method: str = "pearson",
21
+ skip_zeros: bool = False,
22
+ remove_outliers: bool = False,
23
+ log1p: bool = False
24
+ ) -> Dict[str, Any]:
25
  """
26
+ Compute correlation between samples from a matrix file.
27
+
28
+ Args:
29
+ matrix_file: Path to npz matrix file (from multiBamSummary/multiBigwigSummary)
30
+ method: Correlation method ('pearson' or 'spearman')
31
+ skip_zeros: Skip rows with only zeros
32
+ remove_outliers: Remove outliers before correlation
33
+ log1p: Apply log1p transformation
34
+
35
+ Returns:
36
+ Dictionary with correlation matrix and labels
37
+ """
38
+ try:
39
+ from deeptools.correlation import Correlation
40
+
41
+ corr = Correlation(
42
+ matrix_file,
43
+ corr_method=method,
44
+ skip_zeros=skip_zeros,
45
+ remove_outliers=remove_outliers,
46
+ log1p=log1p
47
+ )
48
+
49
+ return {
50
+ "success": True,
51
+ "result": {
52
+ "correlation_matrix": corr.corr_matrix.tolist(),
53
+ "labels": corr.labels,
54
+ "method": method
55
+ },
56
+ "error": None
57
+ }
58
+ except Exception as e:
59
+ return {"success": False, "result": None, "error": str(e)}
60
 
 
 
 
 
61
 
62
+ @mcp.tool(name="get_gc_content", description="Calculate GC content for a genomic region")
63
+ def get_gc_content(
64
+ twobit_file: str,
65
+ chrom: str,
66
+ start: int,
67
+ end: int
68
+ ) -> Dict[str, Any]:
69
+ """
70
+ Calculate GC content for a genomic region.
71
+
72
+ Args:
73
+ twobit_file: Path to 2bit genome file
74
+ chrom: Chromosome name
75
+ start: Start position
76
+ end: End position
77
+
78
  Returns:
79
+ Dictionary with GC content
80
  """
81
  try:
82
+ import py2bit
83
+ from deeptools.utilities import getGC_content
84
+
85
+ tb = py2bit.open(twobit_file)
86
+ gc = getGC_content(tb, chrom, start, end)
87
+ tb.close()
88
+
89
+ return {
90
+ "success": True,
91
+ "result": {"gc_content": gc, "region": f"{chrom}:{start}-{end}"},
92
+ "error": None
93
+ }
94
  except Exception as e:
95
+ return {"success": False, "result": None, "error": str(e)}
96
+
97
 
98
+ @mcp.tool(name="get_bam_stats", description="Get statistics from a BAM file")
99
+ def get_bam_stats(bam_file: str) -> Dict[str, Any]:
100
  """
101
+ Get basic statistics from a BAM file.
102
+
103
+ Args:
104
+ bam_file: Path to BAM file
105
+
106
+ Returns:
107
+ Dictionary with BAM file statistics
108
+ """
109
+ try:
110
+ from deeptools.bamHandler import openBam
111
+
112
+ bam = openBam(bam_file)
113
+
114
+ stats = {
115
+ "mapped_reads": bam.mapped,
116
+ "unmapped_reads": bam.unmapped,
117
+ "references": list(bam.references),
118
+ "lengths": list(bam.lengths),
119
+ "num_references": bam.nreferences
120
+ }
121
+
122
+ bam.close()
123
+
124
+ return {"success": True, "result": stats, "error": None}
125
+ except Exception as e:
126
+ return {"success": False, "result": None, "error": str(e)}
127
 
 
 
 
 
 
128
 
129
+ @mcp.tool(name="get_bigwig_stats", description="Get statistics from a bigWig file")
130
+ def get_bigwig_stats(
131
+ bigwig_file: str,
132
+ chrom: Optional[str] = None,
133
+ start: Optional[int] = None,
134
+ end: Optional[int] = None
135
+ ) -> Dict[str, Any]:
136
+ """
137
+ Get statistics from a bigWig file.
138
+
139
+ Args:
140
+ bigwig_file: Path to bigWig file
141
+ chrom: Optional chromosome to get stats for
142
+ start: Optional start position
143
+ end: Optional end position
144
+
145
  Returns:
146
+ Dictionary with bigWig statistics
147
  """
148
  try:
149
+ import pyBigWig
150
+
151
+ bw = pyBigWig.open(bigwig_file)
152
+
153
+ result = {
154
+ "chromosomes": dict(bw.chroms()),
155
+ "is_bigwig": bw.isBigWig()
156
+ }
157
+
158
+ if chrom and start is not None and end is not None:
159
+ result["region_stats"] = {
160
+ "mean": bw.stats(chrom, start, end, type="mean")[0],
161
+ "min": bw.stats(chrom, start, end, type="min")[0],
162
+ "max": bw.stats(chrom, start, end, type="max")[0],
163
+ "std": bw.stats(chrom, start, end, type="std")[0],
164
+ "sum": bw.stats(chrom, start, end, type="sum")[0]
165
+ }
166
+
167
+ bw.close()
168
+
169
+ return {"success": True, "result": result, "error": None}
170
  except Exception as e:
171
+ return {"success": False, "result": None, "error": str(e)}
172
+
173
 
174
+ @mcp.tool(name="get_bigwig_values", description="Get values from a bigWig file for a region")
175
+ def get_bigwig_values(
176
+ bigwig_file: str,
177
+ chrom: str,
178
+ start: int,
179
+ end: int
180
+ ) -> Dict[str, Any]:
181
+ """
182
+ Get signal values from a bigWig file for a specific region.
183
+
184
+ Args:
185
+ bigwig_file: Path to bigWig file
186
+ chrom: Chromosome name
187
+ start: Start position
188
+ end: End position
189
+
190
+ Returns:
191
+ Dictionary with signal values
192
+ """
193
+ try:
194
+ import pyBigWig
195
+
196
+ bw = pyBigWig.open(bigwig_file)
197
+ values = bw.values(chrom, start, end)
198
+ bw.close()
199
+
200
+ # Convert to list, handling NaN values
201
+ values_list = [float(v) if not np.isnan(v) else None for v in values]
202
+
203
+ return {
204
+ "success": True,
205
+ "result": {
206
+ "region": f"{chrom}:{start}-{end}",
207
+ "values": values_list,
208
+ "length": len(values_list)
209
+ },
210
+ "error": None
211
+ }
212
+ except Exception as e:
213
+ return {"success": False, "result": None, "error": str(e)}
214
+
215
+
216
+ @mcp.tool(name="compute_matrix_stats", description="Get statistics from a deepTools matrix file")
217
+ def compute_matrix_stats(matrix_file: str) -> Dict[str, Any]:
218
+ """
219
+ Get statistics from a deepTools matrix file (from computeMatrix).
220
+
221
+ Args:
222
+ matrix_file: Path to matrix file (.gz or .npz)
223
+
224
+ Returns:
225
+ Dictionary with matrix statistics
226
  """
227
+ try:
228
+ from deeptools.heatmapper import heatmapper
229
+
230
+ hm = heatmapper()
231
+ hm.read_matrix_file(matrix_file)
232
+
233
+ result = {
234
+ "num_samples": len(hm.matrix.sample_labels),
235
+ "sample_labels": hm.matrix.sample_labels,
236
+ "num_regions": hm.matrix.get_num_groups(),
237
+ "group_labels": hm.matrix.group_labels,
238
+ "matrix_shape": list(hm.matrix.matrix.shape) if hasattr(hm.matrix.matrix, 'shape') else None,
239
+ "parameters": hm.parameters if hasattr(hm, 'parameters') else {}
240
+ }
241
+
242
+ return {"success": True, "result": result, "error": None}
243
+ except Exception as e:
244
+ return {"success": False, "result": None, "error": str(e)}
245
 
 
 
 
 
246
 
247
+ @mcp.tool(name="smart_labels", description="Generate smart labels from file paths")
248
+ def smart_labels(file_paths: List[str]) -> Dict[str, Any]:
249
+ """
250
+ Generate clean labels from file paths by removing path and extension.
251
+
252
+ Args:
253
+ file_paths: List of file paths
254
+
255
  Returns:
256
+ Dictionary with clean labels
257
  """
258
  try:
259
+ from deeptools.utilities import smartLabels
260
+
261
+ labels = smartLabels(file_paths)
262
+
263
+ return {
264
+ "success": True,
265
+ "result": {
266
+ "original": file_paths,
267
+ "labels": labels
268
+ },
269
+ "error": None
270
+ }
271
  except Exception as e:
272
+ return {"success": False, "result": None, "error": str(e)}
273
+
274
 
275
+ @mcp.tool(name="load_npz_matrix", description="Load and inspect a numpy matrix file")
276
+ def load_npz_matrix(matrix_file: str) -> Dict[str, Any]:
277
+ """
278
+ Load and inspect a deepTools npz matrix file.
279
+
280
+ Args:
281
+ matrix_file: Path to npz file
282
+
283
+ Returns:
284
+ Dictionary with matrix information
285
  """
286
+ try:
287
+ data = np.load(matrix_file, allow_pickle=True)
288
+
289
+ result = {
290
+ "keys": list(data.keys()),
291
+ "shapes": {k: list(data[k].shape) if hasattr(data[k], 'shape') else None
292
+ for k in data.keys()},
293
+ "dtypes": {k: str(data[k].dtype) if hasattr(data[k], 'dtype') else type(data[k]).__name__
294
+ for k in data.keys()}
295
+ }
296
+
297
+ # If labels exist, include them
298
+ if 'labels' in data:
299
+ labels = data['labels']
300
+ if hasattr(labels, 'tolist'):
301
+ result['labels'] = [str(l) for l in labels.tolist()]
302
+ else:
303
+ result['labels'] = [str(l) for l in labels]
304
+
305
+ return {"success": True, "result": result, "error": None}
306
+ except Exception as e:
307
+ return {"success": False, "result": None, "error": str(e)}
308
 
 
 
 
 
 
309
 
310
+ @mcp.tool(name="calculate_correlation_stats", description="Calculate correlation statistics between two arrays")
311
+ def calculate_correlation_stats(
312
+ array1: List[float],
313
+ array2: List[float],
314
+ method: str = "pearson"
315
+ ) -> Dict[str, Any]:
316
+ """
317
+ Calculate correlation statistics between two numeric arrays.
318
+
319
+ Args:
320
+ array1: First array of values
321
+ array2: Second array of values
322
+ method: Correlation method ('pearson' or 'spearman')
323
+
324
  Returns:
325
+ Dictionary with correlation coefficient and p-value
326
  """
327
  try:
328
+ import scipy.stats
329
+
330
+ a1 = np.array(array1)
331
+ a2 = np.array(array2)
332
+
333
+ # Remove NaN values
334
+ mask = ~(np.isnan(a1) | np.isnan(a2))
335
+ a1 = a1[mask]
336
+ a2 = a2[mask]
337
+
338
+ if method == "spearman":
339
+ corr, pval = scipy.stats.spearmanr(a1, a2)
340
+ else:
341
+ corr, pval = scipy.stats.pearsonr(a1, a2)
342
+
343
+ return {
344
+ "success": True,
345
+ "result": {
346
+ "correlation": float(corr),
347
+ "p_value": float(pval),
348
+ "method": method,
349
+ "n_samples": len(a1)
350
+ },
351
+ "error": None
352
+ }
353
  except Exception as e:
354
+ return {"success": False, "result": None, "error": str(e)}
355
+
356
 
357
+ @mcp.tool(name="list_deeptools_commands", description="List available deepTools command-line tools")
358
+ def list_deeptools_commands() -> Dict[str, Any]:
359
  """
360
+ List all available deepTools command-line tools and their descriptions.
361
+
362
+ Returns:
363
+ Dictionary with tool names and descriptions
364
+ """
365
+ tools = {
366
+ "BAM tools": {
367
+ "alignmentSieve": "Filter alignments from BAM files",
368
+ "bamCompare": "Compare two BAM files based on the number of mapped reads",
369
+ "bamCoverage": "Calculate genome coverage from BAM file",
370
+ "bamPEFragmentSize": "Calculate fragment sizes in paired-end data",
371
+ "estimateReadFiltering": "Estimate the number of reads filtered by alignmentSieve",
372
+ "multiBamSummary": "Summarize multiple BAM files"
373
+ },
374
+ "bigWig tools": {
375
+ "bigwigAverage": "Average multiple bigWig files",
376
+ "bigwigCompare": "Compare two bigWig files",
377
+ "multiBigwigSummary": "Summarize multiple bigWig files"
378
+ },
379
+ "Matrix tools": {
380
+ "computeMatrix": "Calculate scores per genome regions",
381
+ "computeMatrixOperations": "Modify computeMatrix output"
382
+ },
383
+ "Visualization": {
384
+ "plotCorrelation": "Plot correlation heatmap",
385
+ "plotCoverage": "Plot coverage",
386
+ "plotEnrichment": "Plot enrichment",
387
+ "plotFingerprint": "Plot fingerprint",
388
+ "plotHeatmap": "Plot heatmap from computeMatrix output",
389
+ "plotPCA": "Plot PCA",
390
+ "plotProfile": "Plot profile from computeMatrix output"
391
+ },
392
+ "GC bias": {
393
+ "computeGCBias": "Compute GC bias",
394
+ "correctGCBias": "Correct GC bias"
395
+ }
396
+ }
397
+
398
+ return {"success": True, "result": tools, "error": None}
399
 
 
 
 
 
400
 
401
+ @mcp.tool(name="get_chromosome_sizes", description="Get chromosome sizes from a BAM or bigWig file")
402
+ def get_chromosome_sizes(file_path: str) -> Dict[str, Any]:
403
+ """
404
+ Get chromosome sizes from a BAM or bigWig file.
405
+
406
+ Args:
407
+ file_path: Path to BAM or bigWig file
408
+
409
  Returns:
410
+ Dictionary with chromosome names and sizes
411
  """
412
  try:
413
+ if file_path.endswith('.bam'):
414
+ from deeptools.bamHandler import openBam
415
+ bam = openBam(file_path)
416
+ sizes = dict(zip(bam.references, bam.lengths))
417
+ bam.close()
418
+ elif file_path.endswith('.bw') or file_path.endswith('.bigwig') or file_path.endswith('.bigWig'):
419
+ import pyBigWig
420
+ bw = pyBigWig.open(file_path)
421
+ sizes = dict(bw.chroms())
422
+ bw.close()
423
+ else:
424
+ return {"success": False, "result": None, "error": "Unsupported file format"}
425
+
426
+ return {
427
+ "success": True,
428
+ "result": {
429
+ "chromosome_sizes": sizes,
430
+ "total_size": sum(sizes.values()),
431
+ "num_chromosomes": len(sizes)
432
+ },
433
+ "error": None
434
+ }
435
  except Exception as e:
436
+ return {"success": False, "result": None, "error": str(e)}
437
 
 
438
 
439
+ @mcp.tool(name="bin_coverage", description="Calculate binned coverage statistics")
440
+ def bin_coverage(
441
+ values: List[float],
442
+ bin_size: int = 100
443
+ ) -> Dict[str, Any]:
444
  """
445
+ Calculate binned coverage statistics from signal values.
446
+
447
+ Args:
448
+ values: List of signal values
449
+ bin_size: Size of each bin
450
+
451
  Returns:
452
+ Dictionary with binned statistics
453
  """
454
+ try:
455
+ arr = np.array(values)
456
+
457
+ # Remove NaN values for statistics
458
+ valid = arr[~np.isnan(arr)]
459
+
460
+ # Calculate bins
461
+ n_bins = len(arr) // bin_size
462
+ if n_bins == 0:
463
+ n_bins = 1
464
+
465
+ binned = np.array_split(arr, n_bins)
466
+ bin_means = [float(np.nanmean(b)) for b in binned]
467
+ bin_stds = [float(np.nanstd(b)) for b in binned]
468
+
469
+ return {
470
+ "success": True,
471
+ "result": {
472
+ "overall_mean": float(np.nanmean(valid)) if len(valid) > 0 else None,
473
+ "overall_std": float(np.nanstd(valid)) if len(valid) > 0 else None,
474
+ "overall_min": float(np.nanmin(valid)) if len(valid) > 0 else None,
475
+ "overall_max": float(np.nanmax(valid)) if len(valid) > 0 else None,
476
+ "bin_means": bin_means,
477
+ "bin_stds": bin_stds,
478
+ "n_bins": n_bins,
479
+ "bin_size": bin_size
480
+ },
481
+ "error": None
482
+ }
483
+ except Exception as e:
484
+ return {"success": False, "result": None, "error": str(e)}
485
+
486
+
487
+ def create_app() -> FastMCP:
488
+ """Create and return the FastMCP application instance."""
489
+ return mcp
490
+
491
+
492
+ if __name__ == "__main__":
493
+ mcp.run(transport="http", host="0.0.0.0", port=8000)