Spaces:

SEUyishu
/

MatTableGPT

Sleeping

App Files Files Community

SEUyishu commited on Dec 4, 2025

Commit

1f6b6df

verified ·

1 Parent(s): 16e0e02

Update mcp_service.py

Browse files

Files changed (1) hide show

mcp_service.py +262 -0

mcp_service.py CHANGED Viewed

@@ -1701,6 +1701,268 @@ def batch_extract_tables(
     return results
 @mcp.tool()
 def get_environment_requirements() -> Dict:
     """

     return results
+@mcp.tool()
+def format_extraction_as_table(
+    extraction: Dict,
+    output_format: str = "markdown",
+    save_path: str = ""
+) -> Dict:
+    """
+    Format extraction results as a readable table and optionally save to file.
+    Converts the nested extraction JSON into a flat table format that's easy
+    to read and can be saved as CSV, Markdown, or JSON.
+    Args:
+        extraction: The extraction result from any extract_catalyst_data_* tool
+        output_format: Output format - "markdown", "csv", "json", or "html"
+        save_path: Optional file path to save the table (e.g., "results.csv")
+    Returns:
+        Dictionary containing formatted table and save status
+    """
+    try:
+        rows = []
+        # Handle different extraction structures
+        catalysts_data = extraction
+        # If wrapped in "catalysts" list
+        if isinstance(extraction, dict) and "catalysts" in extraction:
+            catalysts_data = extraction["catalysts"]
+        # If it's a list of catalyst dicts
+        if isinstance(catalysts_data, list):
+            for item in catalysts_data:
+                if isinstance(item, dict):
+                    for catalyst_name, performances in item.items():
+                        if isinstance(performances, dict):
+                            for perf_name, properties in performances.items():
+                                row = {
+                                    "Catalyst": catalyst_name,
+                                    "Performance": perf_name
+                                }
+                                if isinstance(properties, dict):
+                                    for prop_key, prop_val in properties.items():
+                                        if isinstance(prop_val, list):
+                                            row[prop_key.capitalize()] = "; ".join(str(v) for v in prop_val)
+                                        else:
+                                            row[prop_key.capitalize()] = str(prop_val) if prop_val else ""
+                                else:
+                                    row["Value"] = str(properties)
+                                rows.append(row)
+        # If it's a single dict of catalysts
+        elif isinstance(catalysts_data, dict):
+            for catalyst_name, performances in catalysts_data.items():
+                if catalyst_name in ["error", "raw_response", "success", "model_type"]:
+                    continue
+                if isinstance(performances, dict):
+                    for perf_name, properties in performances.items():
+                        row = {
+                            "Catalyst": catalyst_name,
+                            "Performance": perf_name
+                        }
+                        if isinstance(properties, dict):
+                            for prop_key, prop_val in properties.items():
+                                if isinstance(prop_val, list):
+                                    row[prop_key.capitalize()] = "; ".join(str(v) for v in prop_val)
+                                else:
+                                    row[prop_key.capitalize()] = str(prop_val) if prop_val else ""
+                        else:
+                            row["Value"] = str(properties)
+                        rows.append(row)
+        if not rows:
+            return {
+                "success": False,
+                "error": "No catalyst data found in extraction",
+                "raw_extraction": extraction
+            }
+        # Create DataFrame
+        df = pd.DataFrame(rows)
+        # Format output
+        if output_format == "markdown":
+            # Create markdown table
+            headers = df.columns.tolist()
+            md_lines = []
+            md_lines.append("| " + " | ".join(headers) + " |")
+            md_lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
+            for _, row in df.iterrows():
+                md_lines.append("| " + " | ".join(str(v) for v in row.values) + " |")
+            formatted_table = "\n".join(md_lines)
+        elif output_format == "csv":
+            formatted_table = df.to_csv(index=False)
+        elif output_format == "json":
+            formatted_table = df.to_json(orient="records", indent=2)
+        elif output_format == "html":
+            formatted_table = df.to_html(index=False, classes="catalyst-table")
+        else:
+            formatted_table = df.to_string(index=False)
+        result = {
+            "success": True,
+            "format": output_format,
+            "row_count": len(rows),
+            "columns": df.columns.tolist(),
+            "table": formatted_table
+        }
+        # Save to file if path provided
+        if save_path:
+            try:
+                # Determine save format from extension
+                ext = os.path.splitext(save_path)[1].lower()
+                if ext == ".csv":
+                    df.to_csv(save_path, index=False)
+                elif ext == ".json":
+                    df.to_json(save_path, orient="records", indent=2)
+                elif ext == ".html":
+                    df.to_html(save_path, index=False)
+                elif ext == ".xlsx":
+                    df.to_excel(save_path, index=False)
+                elif ext == ".md":
+                    with open(save_path, "w", encoding="utf-8") as f:
+                        f.write(formatted_table if output_format == "markdown" else df.to_markdown(index=False))
+                else:
+                    # Default to CSV
+                    df.to_csv(save_path, index=False)
+                result["saved_to"] = save_path
+                result["save_success"] = True
+            except Exception as e:
+                result["save_success"] = False
+                result["save_error"] = str(e)
+        return result
+    except Exception as e:
+        return {
+            "success": False,
+            "error": str(e),
+            "raw_extraction": extraction
+        }
+@mcp.tool()
+def export_session_results(
+    session_id: str,
+    output_format: str = "csv",
+    save_dir: str = ""
+) -> Dict:
+    """
+    Export all extraction results from a session as formatted tables.
+    Combines all extractions from a session into organized output files.
+    Args:
+        session_id: The session ID to export
+        output_format: Output format - "csv", "json", "markdown", or "excel"
+        save_dir: Directory to save files (optional, uses temp dir if not provided)
+    Returns:
+        Dictionary containing export status and file paths
+    """
+    try:
+        session = session_manager.get_session(session_id)
+        if not session:
+            return {"success": False, "error": f"Session not found: {session_id}"}
+        if not session.extractions:
+            return {"success": False, "error": "No extractions in this session"}
+        # Use temp dir if no save_dir provided
+        if not save_dir:
+            save_dir = tempfile.mkdtemp(prefix="matablgpt_export_")
+        os.makedirs(save_dir, exist_ok=True)
+        all_rows = []
+        exported_files = []
+        for extraction in session.extractions:
+            # Format each extraction
+            format_result = format_extraction_as_table(
+                extraction.result,
+                output_format="csv"  # Always use CSV internally for combining
+            )
+            if format_result.get("success") and "table" in format_result:
+                # Parse the CSV back to add metadata
+                import io
+                df = pd.read_csv(io.StringIO(format_result["table"]))
+                df["Table_Name"] = extraction.table_name
+                df["Model_Type"] = extraction.model_type
+                df["Timestamp"] = extraction.timestamp
+                df["Follow_Up"] = extraction.follow_up_applied
+                all_rows.append(df)
+        if not all_rows:
+            return {"success": False, "error": "No valid extractions to export"}
+        # Combine all extractions
+        combined_df = pd.concat(all_rows, ignore_index=True)
+        # Save based on format
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        base_name = f"extraction_{session_id}_{timestamp}"
+        if output_format == "csv":
+            file_path = os.path.join(save_dir, f"{base_name}.csv")
+            combined_df.to_csv(file_path, index=False)
+        elif output_format == "json":
+            file_path = os.path.join(save_dir, f"{base_name}.json")
+            combined_df.to_json(file_path, orient="records", indent=2)
+        elif output_format == "excel":
+            file_path = os.path.join(save_dir, f"{base_name}.xlsx")
+            combined_df.to_excel(file_path, index=False)
+        elif output_format == "markdown":
+            file_path = os.path.join(save_dir, f"{base_name}.md")
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(f"# Extraction Results\n\n")
+                f.write(f"Session: {session_id}\n\n")
+                f.write(f"Exported: {timestamp}\n\n")
+                f.write(combined_df.to_markdown(index=False))
+        else:
+            file_path = os.path.join(save_dir, f"{base_name}.csv")
+            combined_df.to_csv(file_path, index=False)
+        exported_files.append(file_path)
+        # Also create a summary
+        summary = {
+            "session_id": session_id,
+            "total_extractions": len(session.extractions),
+            "total_rows": len(combined_df),
+            "catalysts": combined_df["Catalyst"].unique().tolist() if "Catalyst" in combined_df.columns else [],
+            "performances": combined_df["Performance"].unique().tolist() if "Performance" in combined_df.columns else []
+        }
+        summary_path = os.path.join(save_dir, f"{base_name}_summary.json")
+        with open(summary_path, "w", encoding="utf-8") as f:
+            json.dump(summary, f, indent=2)
+        exported_files.append(summary_path)
+        return {
+            "success": True,
+            "session_id": session_id,
+            "export_dir": save_dir,
+            "files": exported_files,
+            "summary": summary,
+            "preview": combined_df.head(10).to_dict(orient="records")
+        }
+    except Exception as e:
+        return {"success": False, "error": str(e)}
 @mcp.tool()
 def get_environment_requirements() -> Dict:
     """