SEUyishu commited on
Commit
5aaf4b5
·
verified ·
1 Parent(s): e2e8735

Update mcp_output/mcp_plugin/mcp_service.py

Browse files
Files changed (1) hide show
  1. mcp_output/mcp_plugin/mcp_service.py +457 -8
mcp_output/mcp_plugin/mcp_service.py CHANGED
@@ -13,6 +13,9 @@ import base64
13
  import hashlib
14
  import shutil
15
  import uuid
 
 
 
16
  from datetime import datetime
17
  from typing import Optional, List, Dict, Any
18
  from pathlib import Path
@@ -74,6 +77,158 @@ def _generate_model_id(model_name: str) -> str:
74
  return f"model_{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
75
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  # ============================================================================
78
  # 会话管理工具
79
  # ============================================================================
@@ -99,6 +254,14 @@ def create_session(session_name: Optional[str] = None) -> dict:
99
  os.makedirs(os.path.join(session_path, "data"), exist_ok=True)
100
  os.makedirs(os.path.join(session_path, "models"), exist_ok=True)
101
  os.makedirs(os.path.join(session_path, "outputs"), exist_ok=True)
 
 
 
 
 
 
 
 
102
 
103
  session_info = {
104
  "session_id": session_id,
@@ -109,6 +272,7 @@ def create_session(session_name: Optional[str] = None) -> dict:
109
  "outputs_path": os.path.join(session_path, "outputs"),
110
  "uploaded_files": [],
111
  "trained_models": [],
 
112
  "status": "active"
113
  }
114
 
@@ -117,6 +281,7 @@ def create_session(session_name: Optional[str] = None) -> dict:
117
  # Save session info to disk
118
  with open(os.path.join(session_path, "session_info.json"), 'w') as f:
119
  json.dump(session_info, f, indent=2)
 
120
 
121
  return {
122
  "success": True,
@@ -162,6 +327,7 @@ def get_session_info(session_id: str) -> dict:
162
  session_info["current_files"] = files
163
  session_info["file_count"] = len(files)
164
  session_info["has_targets"] = "targets.csv" in files
 
165
 
166
  return {"success": True, **session_info}
167
  except Exception as e:
@@ -272,8 +438,10 @@ def upload_structure_file(
272
  return {"success": False, "error": f"Session not found: {session_id}"}
273
 
274
  data_path = os.path.join(session_path, "data")
275
- file_path = os.path.join(data_path, filename)
276
-
 
 
277
  with open(file_path, 'w', encoding='utf-8') as f:
278
  f.write(file_content)
279
 
@@ -296,12 +464,41 @@ def upload_structure_file(
296
  except Exception as e:
297
  validation = {"valid": False, "warning": str(e)}
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  return {
300
  "success": True,
301
  "filename": filename,
302
  "file_size": len(file_content),
303
  "saved_to": file_path,
304
- "validation": validation
 
305
  }
306
  except Exception as e:
307
  return {"success": False, "error": str(e)}
@@ -338,18 +535,40 @@ def upload_structure_files_batch(
338
  return {"success": False, "error": f"Session not found: {session_id}"}
339
 
340
  data_path = os.path.join(session_path, "data")
 
341
  results = []
342
  success_count = 0
343
 
344
  for filename, content in files.items():
345
  try:
346
- file_path = os.path.join(data_path, filename)
 
347
  with open(file_path, 'w', encoding='utf-8') as f:
348
  f.write(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  results.append({
350
- "filename": filename,
351
  "success": True,
352
- "size": len(content)
 
353
  })
354
  success_count += 1
355
  except Exception as e:
@@ -359,6 +578,11 @@ def upload_structure_files_batch(
359
  "error": str(e)
360
  })
361
 
 
 
 
 
 
362
  return {
363
  "success": True,
364
  "total_files": len(files),
@@ -446,6 +670,11 @@ def upload_targets(
446
  "existing_structure_files": len(structure_files)
447
  }
448
 
 
 
 
 
 
449
  return {
450
  "success": True,
451
  "saved_to": targets_path,
@@ -490,14 +719,21 @@ def upload_binary_file(
490
  dest_path = os.path.join(session_path, "models")
491
  else:
492
  dest_path = os.path.join(session_path, "data")
493
-
494
- file_path = os.path.join(dest_path, filename)
 
495
 
496
  # Decode and write binary content
497
  binary_content = base64.b64decode(base64_content)
498
  with open(file_path, 'wb') as f:
499
  f.write(binary_content)
500
 
 
 
 
 
 
 
501
  return {
502
  "success": True,
503
  "filename": filename,
@@ -508,6 +744,219 @@ def upload_binary_file(
508
  return {"success": False, "error": str(e)}
509
 
510
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
  @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
512
  def check_environment() -> dict:
513
  """
 
13
  import hashlib
14
  import shutil
15
  import uuid
16
+ import zipfile
17
+ import tarfile
18
+ import io
19
  from datetime import datetime
20
  from typing import Optional, List, Dict, Any
21
  from pathlib import Path
 
77
  return f"model_{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
78
 
79
 
80
+ def _safe_join(base: str, *paths: str) -> str:
81
+ """Join paths and ensure the result stays inside base directory."""
82
+ base_path = Path(base).resolve()
83
+ target_path = base_path.joinpath(*paths).resolve()
84
+ if not str(target_path).startswith(str(base_path)):
85
+ raise ValueError("Attempted to write outside of the allowed directory")
86
+ return str(target_path)
87
+
88
+
89
+ def _normalize_filename(filename: str) -> str:
90
+ """Return sanitized filename without directory components."""
91
+ clean_name = os.path.basename(filename)
92
+ if not clean_name:
93
+ raise ValueError("Filename must not be empty")
94
+ return clean_name
95
+
96
+
97
+ def _clear_directory_contents(directory: str) -> None:
98
+ """Remove all files and folders inside the provided directory."""
99
+ if not os.path.isdir(directory):
100
+ return
101
+ for entry in os.listdir(directory):
102
+ entry_path = os.path.join(directory, entry)
103
+ if os.path.isdir(entry_path):
104
+ shutil.rmtree(entry_path)
105
+ else:
106
+ os.remove(entry_path)
107
+
108
+
109
+ def _copy_tree(src: str, dst: str, overwrite: bool = False) -> Dict[str, List[str]]:
110
+ """Copy a directory tree with overwrite and traversal protection."""
111
+ results = {"created": [], "overwritten": [], "skipped": []}
112
+ src_path = Path(src)
113
+ for root, _, files in os.walk(src_path):
114
+ rel_root = os.path.relpath(root, src_path)
115
+ rel_root = "" if rel_root == "." else rel_root
116
+ for file_name in files:
117
+ if file_name.startswith(".__MACOSX"):
118
+ continue
119
+ rel_path = os.path.normpath(os.path.join(rel_root, file_name))
120
+ dest_path = _safe_join(dst, rel_path)
121
+ os.makedirs(os.path.dirname(dest_path), exist_ok=True)
122
+ src_file = os.path.join(root, file_name)
123
+ if os.path.exists(dest_path):
124
+ if overwrite:
125
+ shutil.copy2(src_file, dest_path)
126
+ results["overwritten"].append(rel_path.replace("\\", "/"))
127
+ else:
128
+ results["skipped"].append(rel_path.replace("\\", "/"))
129
+ else:
130
+ shutil.copy2(src_file, dest_path)
131
+ results["created"].append(rel_path.replace("\\", "/"))
132
+ return results
133
+
134
+
135
+ def _resolve_dataset_root(extracted_dir: str) -> str:
136
+ """Select the most probable dataset root inside an extracted archive."""
137
+ entries = [p for p in Path(extracted_dir).iterdir() if not p.name.startswith("__MACOSX")]
138
+ if len(entries) == 1 and entries[0].is_dir():
139
+ return str(entries[0])
140
+ return extracted_dir
141
+
142
+
143
+ def _update_session_uploaded_files(session_path: str) -> None:
144
+ """Rescan session data directory and persist uploaded file list."""
145
+ info_file = os.path.join(session_path, "session_info.json")
146
+ if not os.path.exists(info_file):
147
+ return
148
+ data_dir = os.path.join(session_path, "data")
149
+ uploaded = []
150
+ if os.path.exists(data_dir):
151
+ for root, _, files in os.walk(data_dir):
152
+ for name in files:
153
+ rel_path = os.path.relpath(os.path.join(root, name), data_dir)
154
+ uploaded.append(rel_path.replace("\\", "/"))
155
+ with open(info_file, 'r', encoding='utf-8') as f:
156
+ session_info = json.load(f)
157
+ session_info["uploaded_files"] = sorted(uploaded)
158
+ with open(info_file, 'w', encoding='utf-8') as f:
159
+ json.dump(session_info, f, indent=2)
160
+
161
+
162
+ def _record_session_data_source(session_path: str, source_info: Dict[str, Any]) -> None:
163
+ """Append dataset source metadata to the session record."""
164
+ info_file = os.path.join(session_path, "session_info.json")
165
+ if not os.path.exists(info_file):
166
+ return
167
+ with open(info_file, 'r', encoding='utf-8') as f:
168
+ session_info = json.load(f)
169
+ data_sources = session_info.setdefault("data_sources", [])
170
+ data_sources.append(source_info)
171
+ session_info["data_sources"] = data_sources[-10:]
172
+ with open(info_file, 'w', encoding='utf-8') as f:
173
+ json.dump(session_info, f, indent=2)
174
+
175
+
176
+ def _summarize_dataset_directory(data_path: str) -> Dict[str, Any]:
177
+ """Collect lightweight statistics about files inside a dataset directory."""
178
+ summary = {
179
+ "total_files": 0,
180
+ "targets_csv": False,
181
+ "structure_extensions": {}
182
+ }
183
+ if not os.path.exists(data_path):
184
+ return summary
185
+ for root, _, files in os.walk(data_path):
186
+ for name in files:
187
+ summary["total_files"] += 1
188
+ if name.lower() == "targets.csv":
189
+ summary["targets_csv"] = True
190
+ else:
191
+ ext = os.path.splitext(name)[1].lower() or "<no_ext>"
192
+ summary["structure_extensions"][ext] = summary["structure_extensions"].get(ext, 0) + 1
193
+ return summary
194
+
195
+
196
+ def _safe_extract_zip(archive: zipfile.ZipFile, destination: str) -> None:
197
+ """Extract zip members while preventing path traversal."""
198
+ for member in archive.infolist():
199
+ name = member.filename
200
+ if not name:
201
+ continue
202
+ target_path = _safe_join(destination, name)
203
+ if member.is_dir() or name.endswith('/'):
204
+ os.makedirs(target_path, exist_ok=True)
205
+ continue
206
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
207
+ with archive.open(member, 'r') as src, open(target_path, 'wb') as dst:
208
+ shutil.copyfileobj(src, dst)
209
+
210
+
211
+ def _safe_extract_tar(archive: tarfile.TarFile, destination: str) -> None:
212
+ """Extract tar members while preventing unsafe writes."""
213
+ for member in archive.getmembers():
214
+ name = member.name
215
+ if not name:
216
+ continue
217
+ if member.islnk() or member.issym():
218
+ continue
219
+ target_path = _safe_join(destination, name)
220
+ if member.isdir():
221
+ os.makedirs(target_path, exist_ok=True)
222
+ continue
223
+ if member.isfile():
224
+ extracted = archive.extractfile(member)
225
+ if extracted is None:
226
+ continue
227
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
228
+ with extracted as src, open(target_path, 'wb') as dst:
229
+ shutil.copyfileobj(src, dst)
230
+
231
+
232
  # ============================================================================
233
  # 会话管理工具
234
  # ============================================================================
 
254
  os.makedirs(os.path.join(session_path, "data"), exist_ok=True)
255
  os.makedirs(os.path.join(session_path, "models"), exist_ok=True)
256
  os.makedirs(os.path.join(session_path, "outputs"), exist_ok=True)
257
+ # Initialize an id index file under data to avoid missing-file errors
258
+ id_index_path = os.path.join(session_path, "data", "id.json")
259
+ if not os.path.exists(id_index_path):
260
+ try:
261
+ with open(id_index_path, 'w', encoding='utf-8') as _idf:
262
+ json.dump({}, _idf)
263
+ except Exception:
264
+ pass
265
 
266
  session_info = {
267
  "session_id": session_id,
 
272
  "outputs_path": os.path.join(session_path, "outputs"),
273
  "uploaded_files": [],
274
  "trained_models": [],
275
+ "data_sources": [],
276
  "status": "active"
277
  }
278
 
 
281
  # Save session info to disk
282
  with open(os.path.join(session_path, "session_info.json"), 'w') as f:
283
  json.dump(session_info, f, indent=2)
284
+
285
 
286
  return {
287
  "success": True,
 
327
  session_info["current_files"] = files
328
  session_info["file_count"] = len(files)
329
  session_info["has_targets"] = "targets.csv" in files
330
+ session_info["dataset_summary"] = _summarize_dataset_directory(data_path)
331
 
332
  return {"success": True, **session_info}
333
  except Exception as e:
 
438
  return {"success": False, "error": f"Session not found: {session_id}"}
439
 
440
  data_path = os.path.join(session_path, "data")
441
+ os.makedirs(data_path, exist_ok=True)
442
+ filename = _normalize_filename(filename)
443
+ file_path = _safe_join(data_path, filename)
444
+
445
  with open(file_path, 'w', encoding='utf-8') as f:
446
  f.write(file_content)
447
 
 
464
  except Exception as e:
465
  validation = {"valid": False, "warning": str(e)}
466
 
467
+ # Update id.json index
468
+ try:
469
+ id_index_path = os.path.join(data_path, "id.json")
470
+ if not os.path.exists(id_index_path):
471
+ with open(id_index_path, 'w', encoding='utf-8') as _idf:
472
+ json.dump({}, _idf)
473
+ with open(id_index_path, 'r', encoding='utf-8') as _idf:
474
+ id_index = json.load(_idf)
475
+ except Exception:
476
+ id_index = {}
477
+
478
+ file_id = uuid.uuid4().hex
479
+ id_index[file_id] = {
480
+ "filename": filename,
481
+ "uploaded_at": datetime.now().isoformat(),
482
+ "size": len(file_content)
483
+ }
484
+ try:
485
+ with open(id_index_path, 'w', encoding='utf-8') as _idf:
486
+ json.dump(id_index, _idf, indent=2)
487
+ except Exception:
488
+ pass
489
+
490
+ try:
491
+ _update_session_uploaded_files(session_path)
492
+ except Exception:
493
+ pass
494
+
495
  return {
496
  "success": True,
497
  "filename": filename,
498
  "file_size": len(file_content),
499
  "saved_to": file_path,
500
+ "validation": validation,
501
+ "file_id": file_id
502
  }
503
  except Exception as e:
504
  return {"success": False, "error": str(e)}
 
535
  return {"success": False, "error": f"Session not found: {session_id}"}
536
 
537
  data_path = os.path.join(session_path, "data")
538
+ os.makedirs(data_path, exist_ok=True)
539
  results = []
540
  success_count = 0
541
 
542
  for filename, content in files.items():
543
  try:
544
+ clean_name = _normalize_filename(filename)
545
+ file_path = _safe_join(data_path, clean_name)
546
  with open(file_path, 'w', encoding='utf-8') as f:
547
  f.write(content)
548
+
549
+ # update id index
550
+ try:
551
+ id_index_path = os.path.join(data_path, "id.json")
552
+ if not os.path.exists(id_index_path):
553
+ with open(id_index_path, 'w', encoding='utf-8') as _idf:
554
+ json.dump({}, _idf)
555
+ with open(id_index_path, 'r', encoding='utf-8') as _idf:
556
+ id_index = json.load(_idf)
557
+ except Exception:
558
+ id_index = {}
559
+ file_id = uuid.uuid4().hex
560
+ id_index[file_id] = {"filename": clean_name, "uploaded_at": datetime.now().isoformat(), "size": len(content)}
561
+ try:
562
+ with open(id_index_path, 'w', encoding='utf-8') as _idf:
563
+ json.dump(id_index, _idf, indent=2)
564
+ except Exception:
565
+ pass
566
+
567
  results.append({
568
+ "filename": clean_name,
569
  "success": True,
570
+ "size": len(content),
571
+ "file_id": file_id
572
  })
573
  success_count += 1
574
  except Exception as e:
 
578
  "error": str(e)
579
  })
580
 
581
+ try:
582
+ _update_session_uploaded_files(session_path)
583
+ except Exception:
584
+ pass
585
+
586
  return {
587
  "success": True,
588
  "total_files": len(files),
 
670
  "existing_structure_files": len(structure_files)
671
  }
672
 
673
+ try:
674
+ _update_session_uploaded_files(session_path)
675
+ except Exception:
676
+ pass
677
+
678
  return {
679
  "success": True,
680
  "saved_to": targets_path,
 
719
  dest_path = os.path.join(session_path, "models")
720
  else:
721
  dest_path = os.path.join(session_path, "data")
722
+
723
+ filename = _normalize_filename(filename)
724
+ file_path = _safe_join(dest_path, filename)
725
 
726
  # Decode and write binary content
727
  binary_content = base64.b64decode(base64_content)
728
  with open(file_path, 'wb') as f:
729
  f.write(binary_content)
730
 
731
+ try:
732
+ if destination != "models":
733
+ _update_session_uploaded_files(session_path)
734
+ except Exception:
735
+ pass
736
+
737
  return {
738
  "success": True,
739
  "filename": filename,
 
744
  return {"success": False, "error": str(e)}
745
 
746
 
747
+ @mcp.tool(
748
+ name="upload_dataset_archive",
749
+ description="Upload a compressed dataset archive (zip/tar) into a session's data directory."
750
+ )
751
+ def upload_dataset_archive(
752
+ session_id: str,
753
+ filename: str,
754
+ base64_content: str,
755
+ overwrite_existing: bool = False,
756
+ clear_existing: bool = False
757
+ ) -> dict:
758
+ """Decode and extract a dataset archive directly into the session data folder."""
759
+ try:
760
+ session_path = _get_session_path(session_id)
761
+ if not os.path.exists(session_path):
762
+ return {"success": False, "error": f"Session not found: {session_id}"}
763
+
764
+ data_path = os.path.join(session_path, "data")
765
+ os.makedirs(data_path, exist_ok=True)
766
+
767
+ filename = _normalize_filename(filename)
768
+ archive_bytes = base64.b64decode(base64_content)
769
+
770
+ temp_dir = tempfile.mkdtemp(prefix="mcp_dataset_")
771
+ try:
772
+ archive_lower = filename.lower()
773
+ if archive_lower.endswith(".zip"):
774
+ with zipfile.ZipFile(io.BytesIO(archive_bytes)) as archive:
775
+ _safe_extract_zip(archive, temp_dir)
776
+ elif archive_lower.endswith((".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz")):
777
+ with tarfile.open(fileobj=io.BytesIO(archive_bytes), mode="r:*") as archive:
778
+ _safe_extract_tar(archive, temp_dir)
779
+ else:
780
+ return {
781
+ "success": False,
782
+ "error": "Unsupported archive format. Use .zip, .tar, .tar.gz, .tgz, .tar.bz2"
783
+ }
784
+
785
+ dataset_root = _resolve_dataset_root(temp_dir)
786
+ if not os.listdir(dataset_root):
787
+ return {"success": False, "error": "Archive appears to be empty after extraction."}
788
+
789
+ if clear_existing:
790
+ _clear_directory_contents(data_path)
791
+
792
+ copy_report = _copy_tree(dataset_root, data_path, overwrite=overwrite_existing)
793
+ finally:
794
+ shutil.rmtree(temp_dir, ignore_errors=True)
795
+
796
+ try:
797
+ _update_session_uploaded_files(session_path)
798
+ except Exception:
799
+ pass
800
+
801
+ summary = _summarize_dataset_directory(data_path)
802
+
803
+ try:
804
+ _record_session_data_source(session_path, {
805
+ "type": "archive_upload",
806
+ "filename": filename,
807
+ "timestamp": datetime.now().isoformat(),
808
+ "stats": {
809
+ "created": len(copy_report["created"]),
810
+ "overwritten": len(copy_report["overwritten"]),
811
+ "skipped": len(copy_report["skipped"])
812
+ }
813
+ })
814
+ except Exception:
815
+ pass
816
+
817
+ return {
818
+ "success": True,
819
+ "session_id": session_id,
820
+ "archive_name": filename,
821
+ "created_files": copy_report["created"],
822
+ "overwritten_files": copy_report["overwritten"],
823
+ "skipped_files": copy_report["skipped"],
824
+ "dataset_summary": summary,
825
+ "next_steps": [
826
+ "Use process_session_data to generate graphs",
827
+ "Confirm targets.csv is present before training"
828
+ ]
829
+ }
830
+ except Exception as e:
831
+ return {"success": False, "error": str(e)}
832
+
833
+
834
+ @mcp.tool(
835
+ name="register_local_dataset",
836
+ description="Copy an existing local dataset directory into a session's data workspace."
837
+ )
838
+ def register_local_dataset(
839
+ session_id: str,
840
+ dataset_path: str,
841
+ overwrite_existing: bool = False,
842
+ clear_existing: bool = False
843
+ ) -> dict:
844
+ """Copy a dataset from disk into the managed session directory."""
845
+ try:
846
+ session_path = _get_session_path(session_id)
847
+ if not os.path.exists(session_path):
848
+ return {"success": False, "error": f"Session not found: {session_id}"}
849
+
850
+ dataset_abs = os.path.abspath(dataset_path)
851
+ if not os.path.exists(dataset_abs):
852
+ return {"success": False, "error": f"Dataset path not found: {dataset_path}"}
853
+ if not os.path.isdir(dataset_abs):
854
+ return {"success": False, "error": "dataset_path must be a directory"}
855
+
856
+ data_path = os.path.join(session_path, "data")
857
+ os.makedirs(data_path, exist_ok=True)
858
+
859
+ if Path(dataset_abs).resolve() == Path(data_path).resolve():
860
+ summary = _summarize_dataset_directory(data_path)
861
+ return {
862
+ "success": True,
863
+ "session_id": session_id,
864
+ "source_path": dataset_abs,
865
+ "created_files": [],
866
+ "overwritten_files": [],
867
+ "skipped_files": [],
868
+ "dataset_summary": summary,
869
+ "message": "dataset_path already points to the session data directory; nothing to copy."
870
+ }
871
+
872
+ if clear_existing:
873
+ _clear_directory_contents(data_path)
874
+
875
+ copy_report = _copy_tree(dataset_abs, data_path, overwrite=overwrite_existing)
876
+
877
+ try:
878
+ _update_session_uploaded_files(session_path)
879
+ except Exception:
880
+ pass
881
+
882
+ summary = _summarize_dataset_directory(data_path)
883
+
884
+ try:
885
+ _record_session_data_source(session_path, {
886
+ "type": "local_import",
887
+ "source_path": dataset_abs,
888
+ "timestamp": datetime.now().isoformat(),
889
+ "stats": {
890
+ "created": len(copy_report["created"]),
891
+ "overwritten": len(copy_report["overwritten"]),
892
+ "skipped": len(copy_report["skipped"])
893
+ }
894
+ })
895
+ except Exception:
896
+ pass
897
+
898
+ return {
899
+ "success": True,
900
+ "session_id": session_id,
901
+ "source_path": dataset_abs,
902
+ "created_files": copy_report["created"],
903
+ "overwritten_files": copy_report["overwritten"],
904
+ "skipped_files": copy_report["skipped"],
905
+ "dataset_summary": summary,
906
+ "next_steps": [
907
+ "Verify targets.csv is present in session data",
908
+ "Run process_session_data to generate processed graphs"
909
+ ]
910
+ }
911
+ except Exception as e:
912
+ return {"success": False, "error": str(e)}
913
+
914
+
915
+ @mcp.tool(
916
+ name="list_session_data_files",
917
+ description="List files currently available in a session's data directory."
918
+ )
919
+ def list_session_data_files(
920
+ session_id: str,
921
+ include_sizes: bool = False,
922
+ max_items: int = 200
923
+ ) -> dict:
924
+ """Enumerate dataset files stored for a session."""
925
+ try:
926
+ session_path = _get_session_path(session_id)
927
+ if not os.path.exists(session_path):
928
+ return {"success": False, "error": f"Session not found: {session_id}"}
929
+
930
+ data_path = os.path.join(session_path, "data")
931
+ if not os.path.exists(data_path):
932
+ return {"success": True, "files": [], "total_files": 0, "dataset_summary": {}}
933
+
934
+ files_info = []
935
+ for root, _, files in os.walk(data_path):
936
+ for name in files:
937
+ rel_path = os.path.relpath(os.path.join(root, name), data_path).replace("\\", "/")
938
+ file_entry: Dict[str, Any] = {"path": rel_path}
939
+ file_abs_path = os.path.join(root, name)
940
+ if include_sizes:
941
+ file_entry["size_bytes"] = os.path.getsize(file_abs_path)
942
+ files_info.append(file_entry)
943
+
944
+ files_info.sort(key=lambda item: item["path"].lower())
945
+ total_files = len(files_info)
946
+ truncated = files_info[:max(0, max_items)]
947
+
948
+ return {
949
+ "success": True,
950
+ "session_id": session_id,
951
+ "files": truncated,
952
+ "total_files": total_files,
953
+ "truncated": total_files > len(truncated),
954
+ "dataset_summary": _summarize_dataset_directory(data_path)
955
+ }
956
+ except Exception as e:
957
+ return {"success": False, "error": str(e)}
958
+
959
+
960
  @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
961
  def check_environment() -> dict:
962
  """