Spaces:

SEUyishu
/

MatDeepLearn

Sleeping

App Files Files Community

SEUyishu commited on Dec 4, 2025

Commit

5aaf4b5

verified ·

1 Parent(s): e2e8735

Update mcp_output/mcp_plugin/mcp_service.py

Browse files

Files changed (1) hide show

mcp_output/mcp_plugin/mcp_service.py +457 -8

mcp_output/mcp_plugin/mcp_service.py CHANGED Viewed

@@ -13,6 +13,9 @@ import base64
 import hashlib
 import shutil
 import uuid
 from datetime import datetime
 from typing import Optional, List, Dict, Any
 from pathlib import Path
@@ -74,6 +77,158 @@ def _generate_model_id(model_name: str) -> str:
     return f"model_{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
 # ============================================================================
 # 会话管理工具
 # ============================================================================
@@ -99,6 +254,14 @@ def create_session(session_name: Optional[str] = None) -> dict:
         os.makedirs(os.path.join(session_path, "data"), exist_ok=True)
         os.makedirs(os.path.join(session_path, "models"), exist_ok=True)
         os.makedirs(os.path.join(session_path, "outputs"), exist_ok=True)
         session_info = {
             "session_id": session_id,
@@ -109,6 +272,7 @@ def create_session(session_name: Optional[str] = None) -> dict:
             "outputs_path": os.path.join(session_path, "outputs"),
             "uploaded_files": [],
             "trained_models": [],
             "status": "active"
         }
@@ -117,6 +281,7 @@ def create_session(session_name: Optional[str] = None) -> dict:
         # Save session info to disk
         with open(os.path.join(session_path, "session_info.json"), 'w') as f:
             json.dump(session_info, f, indent=2)
         return {
             "success": True,
@@ -162,6 +327,7 @@ def get_session_info(session_id: str) -> dict:
             session_info["current_files"] = files
             session_info["file_count"] = len(files)
             session_info["has_targets"] = "targets.csv" in files
         return {"success": True, **session_info}
     except Exception as e:
@@ -272,8 +438,10 @@ def upload_structure_file(
             return {"success": False, "error": f"Session not found: {session_id}"}
         data_path = os.path.join(session_path, "data")
-        file_path = os.path.join(data_path, filename)
         with open(file_path, 'w', encoding='utf-8') as f:
             f.write(file_content)
@@ -296,12 +464,41 @@ def upload_structure_file(
         except Exception as e:
             validation = {"valid": False, "warning": str(e)}
         return {
             "success": True,
             "filename": filename,
             "file_size": len(file_content),
             "saved_to": file_path,
-            "validation": validation
         }
     except Exception as e:
         return {"success": False, "error": str(e)}
@@ -338,18 +535,40 @@ def upload_structure_files_batch(
             return {"success": False, "error": f"Session not found: {session_id}"}
         data_path = os.path.join(session_path, "data")
         results = []
         success_count = 0
         for filename, content in files.items():
             try:
-                file_path = os.path.join(data_path, filename)
                 with open(file_path, 'w', encoding='utf-8') as f:
                     f.write(content)
                 results.append({
-                    "filename": filename,
                     "success": True,
-                    "size": len(content)
                 })
                 success_count += 1
             except Exception as e:
@@ -359,6 +578,11 @@ def upload_structure_files_batch(
                     "error": str(e)
                 })
         return {
             "success": True,
             "total_files": len(files),
@@ -446,6 +670,11 @@ def upload_targets(
                 "existing_structure_files": len(structure_files)
             }
         return {
             "success": True,
             "saved_to": targets_path,
@@ -490,14 +719,21 @@ def upload_binary_file(
             dest_path = os.path.join(session_path, "models")
         else:
             dest_path = os.path.join(session_path, "data")
-        file_path = os.path.join(dest_path, filename)
         # Decode and write binary content
         binary_content = base64.b64decode(base64_content)
         with open(file_path, 'wb') as f:
             f.write(binary_content)
         return {
             "success": True,
             "filename": filename,
@@ -508,6 +744,219 @@ def upload_binary_file(
         return {"success": False, "error": str(e)}
 @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
 def check_environment() -> dict:
     """

 import hashlib
 import shutil
 import uuid
+import zipfile
+import tarfile
+import io
 from datetime import datetime
 from typing import Optional, List, Dict, Any
 from pathlib import Path
     return f"model_{model_name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
+def _safe_join(base: str, *paths: str) -> str:
+    """Join paths and ensure the result stays inside base directory."""
+    base_path = Path(base).resolve()
+    target_path = base_path.joinpath(*paths).resolve()
+    if not str(target_path).startswith(str(base_path)):
+        raise ValueError("Attempted to write outside of the allowed directory")
+    return str(target_path)
+def _normalize_filename(filename: str) -> str:
+    """Return sanitized filename without directory components."""
+    clean_name = os.path.basename(filename)
+    if not clean_name:
+        raise ValueError("Filename must not be empty")
+    return clean_name
+def _clear_directory_contents(directory: str) -> None:
+    """Remove all files and folders inside the provided directory."""
+    if not os.path.isdir(directory):
+        return
+    for entry in os.listdir(directory):
+        entry_path = os.path.join(directory, entry)
+        if os.path.isdir(entry_path):
+            shutil.rmtree(entry_path)
+        else:
+            os.remove(entry_path)
+def _copy_tree(src: str, dst: str, overwrite: bool = False) -> Dict[str, List[str]]:
+    """Copy a directory tree with overwrite and traversal protection."""
+    results = {"created": [], "overwritten": [], "skipped": []}
+    src_path = Path(src)
+    for root, _, files in os.walk(src_path):
+        rel_root = os.path.relpath(root, src_path)
+        rel_root = "" if rel_root == "." else rel_root
+        for file_name in files:
+            if file_name.startswith(".__MACOSX"):
+                continue
+            rel_path = os.path.normpath(os.path.join(rel_root, file_name))
+            dest_path = _safe_join(dst, rel_path)
+            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+            src_file = os.path.join(root, file_name)
+            if os.path.exists(dest_path):
+                if overwrite:
+                    shutil.copy2(src_file, dest_path)
+                    results["overwritten"].append(rel_path.replace("\\", "/"))
+                else:
+                    results["skipped"].append(rel_path.replace("\\", "/"))
+            else:
+                shutil.copy2(src_file, dest_path)
+                results["created"].append(rel_path.replace("\\", "/"))
+    return results
+def _resolve_dataset_root(extracted_dir: str) -> str:
+    """Select the most probable dataset root inside an extracted archive."""
+    entries = [p for p in Path(extracted_dir).iterdir() if not p.name.startswith("__MACOSX")]
+    if len(entries) == 1 and entries[0].is_dir():
+        return str(entries[0])
+    return extracted_dir
+def _update_session_uploaded_files(session_path: str) -> None:
+    """Rescan session data directory and persist uploaded file list."""
+    info_file = os.path.join(session_path, "session_info.json")
+    if not os.path.exists(info_file):
+        return
+    data_dir = os.path.join(session_path, "data")
+    uploaded = []
+    if os.path.exists(data_dir):
+        for root, _, files in os.walk(data_dir):
+            for name in files:
+                rel_path = os.path.relpath(os.path.join(root, name), data_dir)
+                uploaded.append(rel_path.replace("\\", "/"))
+    with open(info_file, 'r', encoding='utf-8') as f:
+        session_info = json.load(f)
+    session_info["uploaded_files"] = sorted(uploaded)
+    with open(info_file, 'w', encoding='utf-8') as f:
+        json.dump(session_info, f, indent=2)
+def _record_session_data_source(session_path: str, source_info: Dict[str, Any]) -> None:
+    """Append dataset source metadata to the session record."""
+    info_file = os.path.join(session_path, "session_info.json")
+    if not os.path.exists(info_file):
+        return
+    with open(info_file, 'r', encoding='utf-8') as f:
+        session_info = json.load(f)
+    data_sources = session_info.setdefault("data_sources", [])
+    data_sources.append(source_info)
+    session_info["data_sources"] = data_sources[-10:]
+    with open(info_file, 'w', encoding='utf-8') as f:
+        json.dump(session_info, f, indent=2)
+def _summarize_dataset_directory(data_path: str) -> Dict[str, Any]:
+    """Collect lightweight statistics about files inside a dataset directory."""
+    summary = {
+        "total_files": 0,
+        "targets_csv": False,
+        "structure_extensions": {}
+    }
+    if not os.path.exists(data_path):
+        return summary
+    for root, _, files in os.walk(data_path):
+        for name in files:
+            summary["total_files"] += 1
+            if name.lower() == "targets.csv":
+                summary["targets_csv"] = True
+            else:
+                ext = os.path.splitext(name)[1].lower() or "<no_ext>"
+                summary["structure_extensions"][ext] = summary["structure_extensions"].get(ext, 0) + 1
+    return summary
+def _safe_extract_zip(archive: zipfile.ZipFile, destination: str) -> None:
+    """Extract zip members while preventing path traversal."""
+    for member in archive.infolist():
+        name = member.filename
+        if not name:
+            continue
+        target_path = _safe_join(destination, name)
+        if member.is_dir() or name.endswith('/'):
+            os.makedirs(target_path, exist_ok=True)
+            continue
+        os.makedirs(os.path.dirname(target_path), exist_ok=True)
+        with archive.open(member, 'r') as src, open(target_path, 'wb') as dst:
+            shutil.copyfileobj(src, dst)
+def _safe_extract_tar(archive: tarfile.TarFile, destination: str) -> None:
+    """Extract tar members while preventing unsafe writes."""
+    for member in archive.getmembers():
+        name = member.name
+        if not name:
+            continue
+        if member.islnk() or member.issym():
+            continue
+        target_path = _safe_join(destination, name)
+        if member.isdir():
+            os.makedirs(target_path, exist_ok=True)
+            continue
+        if member.isfile():
+            extracted = archive.extractfile(member)
+            if extracted is None:
+                continue
+            os.makedirs(os.path.dirname(target_path), exist_ok=True)
+            with extracted as src, open(target_path, 'wb') as dst:
+                shutil.copyfileobj(src, dst)
 # ============================================================================
 # 会话管理工具
 # ============================================================================
         os.makedirs(os.path.join(session_path, "data"), exist_ok=True)
         os.makedirs(os.path.join(session_path, "models"), exist_ok=True)
         os.makedirs(os.path.join(session_path, "outputs"), exist_ok=True)
+        # Initialize an id index file under data to avoid missing-file errors
+        id_index_path = os.path.join(session_path, "data", "id.json")
+        if not os.path.exists(id_index_path):
+            try:
+                with open(id_index_path, 'w', encoding='utf-8') as _idf:
+                    json.dump({}, _idf)
+            except Exception:
+                pass
         session_info = {
             "session_id": session_id,
             "outputs_path": os.path.join(session_path, "outputs"),
             "uploaded_files": [],
             "trained_models": [],
+            "data_sources": [],
             "status": "active"
         }
         # Save session info to disk
         with open(os.path.join(session_path, "session_info.json"), 'w') as f:
             json.dump(session_info, f, indent=2)
         return {
             "success": True,
             session_info["current_files"] = files
             session_info["file_count"] = len(files)
             session_info["has_targets"] = "targets.csv" in files
+            session_info["dataset_summary"] = _summarize_dataset_directory(data_path)
         return {"success": True, **session_info}
     except Exception as e:
             return {"success": False, "error": f"Session not found: {session_id}"}
         data_path = os.path.join(session_path, "data")
+        os.makedirs(data_path, exist_ok=True)
+        filename = _normalize_filename(filename)
+        file_path = _safe_join(data_path, filename)
         with open(file_path, 'w', encoding='utf-8') as f:
             f.write(file_content)
         except Exception as e:
             validation = {"valid": False, "warning": str(e)}
+        # Update id.json index
+        try:
+            id_index_path = os.path.join(data_path, "id.json")
+            if not os.path.exists(id_index_path):
+                with open(id_index_path, 'w', encoding='utf-8') as _idf:
+                    json.dump({}, _idf)
+            with open(id_index_path, 'r', encoding='utf-8') as _idf:
+                id_index = json.load(_idf)
+        except Exception:
+            id_index = {}
+        file_id = uuid.uuid4().hex
+        id_index[file_id] = {
+            "filename": filename,
+            "uploaded_at": datetime.now().isoformat(),
+            "size": len(file_content)
+        }
+        try:
+            with open(id_index_path, 'w', encoding='utf-8') as _idf:
+                json.dump(id_index, _idf, indent=2)
+        except Exception:
+            pass
+        try:
+            _update_session_uploaded_files(session_path)
+        except Exception:
+            pass
         return {
             "success": True,
             "filename": filename,
             "file_size": len(file_content),
             "saved_to": file_path,
+            "validation": validation,
+            "file_id": file_id
         }
     except Exception as e:
         return {"success": False, "error": str(e)}
             return {"success": False, "error": f"Session not found: {session_id}"}
         data_path = os.path.join(session_path, "data")
+        os.makedirs(data_path, exist_ok=True)
         results = []
         success_count = 0
         for filename, content in files.items():
             try:
+                clean_name = _normalize_filename(filename)
+                file_path = _safe_join(data_path, clean_name)
                 with open(file_path, 'w', encoding='utf-8') as f:
                     f.write(content)
+                # update id index
+                try:
+                    id_index_path = os.path.join(data_path, "id.json")
+                    if not os.path.exists(id_index_path):
+                        with open(id_index_path, 'w', encoding='utf-8') as _idf:
+                            json.dump({}, _idf)
+                    with open(id_index_path, 'r', encoding='utf-8') as _idf:
+                        id_index = json.load(_idf)
+                except Exception:
+                    id_index = {}
+                file_id = uuid.uuid4().hex
+                id_index[file_id] = {"filename": clean_name, "uploaded_at": datetime.now().isoformat(), "size": len(content)}
+                try:
+                    with open(id_index_path, 'w', encoding='utf-8') as _idf:
+                        json.dump(id_index, _idf, indent=2)
+                except Exception:
+                    pass
                 results.append({
+                    "filename": clean_name,
                     "success": True,
+                    "size": len(content),
+                    "file_id": file_id
                 })
                 success_count += 1
             except Exception as e:
                     "error": str(e)
                 })
+        try:
+            _update_session_uploaded_files(session_path)
+        except Exception:
+            pass
         return {
             "success": True,
             "total_files": len(files),
                 "existing_structure_files": len(structure_files)
             }
+        try:
+            _update_session_uploaded_files(session_path)
+        except Exception:
+            pass
         return {
             "success": True,
             "saved_to": targets_path,
             dest_path = os.path.join(session_path, "models")
         else:
             dest_path = os.path.join(session_path, "data")
+        filename = _normalize_filename(filename)
+        file_path = _safe_join(dest_path, filename)
         # Decode and write binary content
         binary_content = base64.b64decode(base64_content)
         with open(file_path, 'wb') as f:
             f.write(binary_content)
+        try:
+            if destination != "models":
+                _update_session_uploaded_files(session_path)
+        except Exception:
+            pass
         return {
             "success": True,
             "filename": filename,
         return {"success": False, "error": str(e)}
+@mcp.tool(
+    name="upload_dataset_archive",
+    description="Upload a compressed dataset archive (zip/tar) into a session's data directory."
+)
+def upload_dataset_archive(
+    session_id: str,
+    filename: str,
+    base64_content: str,
+    overwrite_existing: bool = False,
+    clear_existing: bool = False
+) -> dict:
+    """Decode and extract a dataset archive directly into the session data folder."""
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        os.makedirs(data_path, exist_ok=True)
+        filename = _normalize_filename(filename)
+        archive_bytes = base64.b64decode(base64_content)
+        temp_dir = tempfile.mkdtemp(prefix="mcp_dataset_")
+        try:
+            archive_lower = filename.lower()
+            if archive_lower.endswith(".zip"):
+                with zipfile.ZipFile(io.BytesIO(archive_bytes)) as archive:
+                    _safe_extract_zip(archive, temp_dir)
+            elif archive_lower.endswith((".tar", ".tar.gz", ".tgz", ".tar.bz2", ".tbz")):
+                with tarfile.open(fileobj=io.BytesIO(archive_bytes), mode="r:*") as archive:
+                    _safe_extract_tar(archive, temp_dir)
+            else:
+                return {
+                    "success": False,
+                    "error": "Unsupported archive format. Use .zip, .tar, .tar.gz, .tgz, .tar.bz2"
+                }
+            dataset_root = _resolve_dataset_root(temp_dir)
+            if not os.listdir(dataset_root):
+                return {"success": False, "error": "Archive appears to be empty after extraction."}
+            if clear_existing:
+                _clear_directory_contents(data_path)
+            copy_report = _copy_tree(dataset_root, data_path, overwrite=overwrite_existing)
+        finally:
+            shutil.rmtree(temp_dir, ignore_errors=True)
+        try:
+            _update_session_uploaded_files(session_path)
+        except Exception:
+            pass
+        summary = _summarize_dataset_directory(data_path)
+        try:
+            _record_session_data_source(session_path, {
+                "type": "archive_upload",
+                "filename": filename,
+                "timestamp": datetime.now().isoformat(),
+                "stats": {
+                    "created": len(copy_report["created"]),
+                    "overwritten": len(copy_report["overwritten"]),
+                    "skipped": len(copy_report["skipped"])
+                }
+            })
+        except Exception:
+            pass
+        return {
+            "success": True,
+            "session_id": session_id,
+            "archive_name": filename,
+            "created_files": copy_report["created"],
+            "overwritten_files": copy_report["overwritten"],
+            "skipped_files": copy_report["skipped"],
+            "dataset_summary": summary,
+            "next_steps": [
+                "Use process_session_data to generate graphs",
+                "Confirm targets.csv is present before training"
+            ]
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(
+    name="register_local_dataset",
+    description="Copy an existing local dataset directory into a session's data workspace."
+)
+def register_local_dataset(
+    session_id: str,
+    dataset_path: str,
+    overwrite_existing: bool = False,
+    clear_existing: bool = False
+) -> dict:
+    """Copy a dataset from disk into the managed session directory."""
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        dataset_abs = os.path.abspath(dataset_path)
+        if not os.path.exists(dataset_abs):
+            return {"success": False, "error": f"Dataset path not found: {dataset_path}"}
+        if not os.path.isdir(dataset_abs):
+            return {"success": False, "error": "dataset_path must be a directory"}
+        data_path = os.path.join(session_path, "data")
+        os.makedirs(data_path, exist_ok=True)
+        if Path(dataset_abs).resolve() == Path(data_path).resolve():
+            summary = _summarize_dataset_directory(data_path)
+            return {
+                "success": True,
+                "session_id": session_id,
+                "source_path": dataset_abs,
+                "created_files": [],
+                "overwritten_files": [],
+                "skipped_files": [],
+                "dataset_summary": summary,
+                "message": "dataset_path already points to the session data directory; nothing to copy."
+            }
+        if clear_existing:
+            _clear_directory_contents(data_path)
+        copy_report = _copy_tree(dataset_abs, data_path, overwrite=overwrite_existing)
+        try:
+            _update_session_uploaded_files(session_path)
+        except Exception:
+            pass
+        summary = _summarize_dataset_directory(data_path)
+        try:
+            _record_session_data_source(session_path, {
+                "type": "local_import",
+                "source_path": dataset_abs,
+                "timestamp": datetime.now().isoformat(),
+                "stats": {
+                    "created": len(copy_report["created"]),
+                    "overwritten": len(copy_report["overwritten"]),
+                    "skipped": len(copy_report["skipped"])
+                }
+            })
+        except Exception:
+            pass
+        return {
+            "success": True,
+            "session_id": session_id,
+            "source_path": dataset_abs,
+            "created_files": copy_report["created"],
+            "overwritten_files": copy_report["overwritten"],
+            "skipped_files": copy_report["skipped"],
+            "dataset_summary": summary,
+            "next_steps": [
+                "Verify targets.csv is present in session data",
+                "Run process_session_data to generate processed graphs"
+            ]
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
+@mcp.tool(
+    name="list_session_data_files",
+    description="List files currently available in a session's data directory."
+)
+def list_session_data_files(
+    session_id: str,
+    include_sizes: bool = False,
+    max_items: int = 200
+) -> dict:
+    """Enumerate dataset files stored for a session."""
+    try:
+        session_path = _get_session_path(session_id)
+        if not os.path.exists(session_path):
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        data_path = os.path.join(session_path, "data")
+        if not os.path.exists(data_path):
+            return {"success": True, "files": [], "total_files": 0, "dataset_summary": {}}
+        files_info = []
+        for root, _, files in os.walk(data_path):
+            for name in files:
+                rel_path = os.path.relpath(os.path.join(root, name), data_path).replace("\\", "/")
+                file_entry: Dict[str, Any] = {"path": rel_path}
+                file_abs_path = os.path.join(root, name)
+                if include_sizes:
+                    file_entry["size_bytes"] = os.path.getsize(file_abs_path)
+                files_info.append(file_entry)
+        files_info.sort(key=lambda item: item["path"].lower())
+        total_files = len(files_info)
+        truncated = files_info[:max(0, max_items)]
+        return {
+            "success": True,
+            "session_id": session_id,
+            "files": truncated,
+            "total_files": total_files,
+            "truncated": total_files > len(truncated),
+            "dataset_summary": _summarize_dataset_directory(data_path)
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
 @mcp.tool(name="check_environment", description="Check if MatDeepLearn environment is properly configured and GPU is available.")
 def check_environment() -> dict:
     """