SEUyishu commited on
Commit
1f6b6df
·
verified ·
1 Parent(s): 16e0e02

Update mcp_service.py

Browse files
Files changed (1) hide show
  1. mcp_service.py +262 -0
mcp_service.py CHANGED
@@ -1701,6 +1701,268 @@ def batch_extract_tables(
1701
  return results
1702
 
1703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1704
  @mcp.tool()
1705
  def get_environment_requirements() -> Dict:
1706
  """
 
1701
  return results
1702
 
1703
 
1704
+ @mcp.tool()
1705
+ def format_extraction_as_table(
1706
+ extraction: Dict,
1707
+ output_format: str = "markdown",
1708
+ save_path: str = ""
1709
+ ) -> Dict:
1710
+ """
1711
+ Format extraction results as a readable table and optionally save to file.
1712
+
1713
+ Converts the nested extraction JSON into a flat table format that's easy
1714
+ to read and can be saved as CSV, Markdown, or JSON.
1715
+
1716
+ Args:
1717
+ extraction: The extraction result from any extract_catalyst_data_* tool
1718
+ output_format: Output format - "markdown", "csv", "json", or "html"
1719
+ save_path: Optional file path to save the table (e.g., "results.csv")
1720
+
1721
+ Returns:
1722
+ Dictionary containing formatted table and save status
1723
+ """
1724
+ try:
1725
+ rows = []
1726
+
1727
+ # Handle different extraction structures
1728
+ catalysts_data = extraction
1729
+
1730
+ # If wrapped in "catalysts" list
1731
+ if isinstance(extraction, dict) and "catalysts" in extraction:
1732
+ catalysts_data = extraction["catalysts"]
1733
+
1734
+ # If it's a list of catalyst dicts
1735
+ if isinstance(catalysts_data, list):
1736
+ for item in catalysts_data:
1737
+ if isinstance(item, dict):
1738
+ for catalyst_name, performances in item.items():
1739
+ if isinstance(performances, dict):
1740
+ for perf_name, properties in performances.items():
1741
+ row = {
1742
+ "Catalyst": catalyst_name,
1743
+ "Performance": perf_name
1744
+ }
1745
+ if isinstance(properties, dict):
1746
+ for prop_key, prop_val in properties.items():
1747
+ if isinstance(prop_val, list):
1748
+ row[prop_key.capitalize()] = "; ".join(str(v) for v in prop_val)
1749
+ else:
1750
+ row[prop_key.capitalize()] = str(prop_val) if prop_val else ""
1751
+ else:
1752
+ row["Value"] = str(properties)
1753
+ rows.append(row)
1754
+ # If it's a single dict of catalysts
1755
+ elif isinstance(catalysts_data, dict):
1756
+ for catalyst_name, performances in catalysts_data.items():
1757
+ if catalyst_name in ["error", "raw_response", "success", "model_type"]:
1758
+ continue
1759
+ if isinstance(performances, dict):
1760
+ for perf_name, properties in performances.items():
1761
+ row = {
1762
+ "Catalyst": catalyst_name,
1763
+ "Performance": perf_name
1764
+ }
1765
+ if isinstance(properties, dict):
1766
+ for prop_key, prop_val in properties.items():
1767
+ if isinstance(prop_val, list):
1768
+ row[prop_key.capitalize()] = "; ".join(str(v) for v in prop_val)
1769
+ else:
1770
+ row[prop_key.capitalize()] = str(prop_val) if prop_val else ""
1771
+ else:
1772
+ row["Value"] = str(properties)
1773
+ rows.append(row)
1774
+
1775
+ if not rows:
1776
+ return {
1777
+ "success": False,
1778
+ "error": "No catalyst data found in extraction",
1779
+ "raw_extraction": extraction
1780
+ }
1781
+
1782
+ # Create DataFrame
1783
+ df = pd.DataFrame(rows)
1784
+
1785
+ # Format output
1786
+ if output_format == "markdown":
1787
+ # Create markdown table
1788
+ headers = df.columns.tolist()
1789
+ md_lines = []
1790
+ md_lines.append("| " + " | ".join(headers) + " |")
1791
+ md_lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
1792
+ for _, row in df.iterrows():
1793
+ md_lines.append("| " + " | ".join(str(v) for v in row.values) + " |")
1794
+ formatted_table = "\n".join(md_lines)
1795
+
1796
+ elif output_format == "csv":
1797
+ formatted_table = df.to_csv(index=False)
1798
+
1799
+ elif output_format == "json":
1800
+ formatted_table = df.to_json(orient="records", indent=2)
1801
+
1802
+ elif output_format == "html":
1803
+ formatted_table = df.to_html(index=False, classes="catalyst-table")
1804
+
1805
+ else:
1806
+ formatted_table = df.to_string(index=False)
1807
+
1808
+ result = {
1809
+ "success": True,
1810
+ "format": output_format,
1811
+ "row_count": len(rows),
1812
+ "columns": df.columns.tolist(),
1813
+ "table": formatted_table
1814
+ }
1815
+
1816
+ # Save to file if path provided
1817
+ if save_path:
1818
+ try:
1819
+ # Determine save format from extension
1820
+ ext = os.path.splitext(save_path)[1].lower()
1821
+
1822
+ if ext == ".csv":
1823
+ df.to_csv(save_path, index=False)
1824
+ elif ext == ".json":
1825
+ df.to_json(save_path, orient="records", indent=2)
1826
+ elif ext == ".html":
1827
+ df.to_html(save_path, index=False)
1828
+ elif ext == ".xlsx":
1829
+ df.to_excel(save_path, index=False)
1830
+ elif ext == ".md":
1831
+ with open(save_path, "w", encoding="utf-8") as f:
1832
+ f.write(formatted_table if output_format == "markdown" else df.to_markdown(index=False))
1833
+ else:
1834
+ # Default to CSV
1835
+ df.to_csv(save_path, index=False)
1836
+
1837
+ result["saved_to"] = save_path
1838
+ result["save_success"] = True
1839
+
1840
+ except Exception as e:
1841
+ result["save_success"] = False
1842
+ result["save_error"] = str(e)
1843
+
1844
+ return result
1845
+
1846
+ except Exception as e:
1847
+ return {
1848
+ "success": False,
1849
+ "error": str(e),
1850
+ "raw_extraction": extraction
1851
+ }
1852
+
1853
+
1854
+ @mcp.tool()
1855
+ def export_session_results(
1856
+ session_id: str,
1857
+ output_format: str = "csv",
1858
+ save_dir: str = ""
1859
+ ) -> Dict:
1860
+ """
1861
+ Export all extraction results from a session as formatted tables.
1862
+
1863
+ Combines all extractions from a session into organized output files.
1864
+
1865
+ Args:
1866
+ session_id: The session ID to export
1867
+ output_format: Output format - "csv", "json", "markdown", or "excel"
1868
+ save_dir: Directory to save files (optional, uses temp dir if not provided)
1869
+
1870
+ Returns:
1871
+ Dictionary containing export status and file paths
1872
+ """
1873
+ try:
1874
+ session = session_manager.get_session(session_id)
1875
+ if not session:
1876
+ return {"success": False, "error": f"Session not found: {session_id}"}
1877
+
1878
+ if not session.extractions:
1879
+ return {"success": False, "error": "No extractions in this session"}
1880
+
1881
+ # Use temp dir if no save_dir provided
1882
+ if not save_dir:
1883
+ save_dir = tempfile.mkdtemp(prefix="matablgpt_export_")
1884
+
1885
+ os.makedirs(save_dir, exist_ok=True)
1886
+
1887
+ all_rows = []
1888
+ exported_files = []
1889
+
1890
+ for extraction in session.extractions:
1891
+ # Format each extraction
1892
+ format_result = format_extraction_as_table(
1893
+ extraction.result,
1894
+ output_format="csv" # Always use CSV internally for combining
1895
+ )
1896
+
1897
+ if format_result.get("success") and "table" in format_result:
1898
+ # Parse the CSV back to add metadata
1899
+ import io
1900
+ df = pd.read_csv(io.StringIO(format_result["table"]))
1901
+ df["Table_Name"] = extraction.table_name
1902
+ df["Model_Type"] = extraction.model_type
1903
+ df["Timestamp"] = extraction.timestamp
1904
+ df["Follow_Up"] = extraction.follow_up_applied
1905
+ all_rows.append(df)
1906
+
1907
+ if not all_rows:
1908
+ return {"success": False, "error": "No valid extractions to export"}
1909
+
1910
+ # Combine all extractions
1911
+ combined_df = pd.concat(all_rows, ignore_index=True)
1912
+
1913
+ # Save based on format
1914
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
1915
+ base_name = f"extraction_{session_id}_{timestamp}"
1916
+
1917
+ if output_format == "csv":
1918
+ file_path = os.path.join(save_dir, f"{base_name}.csv")
1919
+ combined_df.to_csv(file_path, index=False)
1920
+ elif output_format == "json":
1921
+ file_path = os.path.join(save_dir, f"{base_name}.json")
1922
+ combined_df.to_json(file_path, orient="records", indent=2)
1923
+ elif output_format == "excel":
1924
+ file_path = os.path.join(save_dir, f"{base_name}.xlsx")
1925
+ combined_df.to_excel(file_path, index=False)
1926
+ elif output_format == "markdown":
1927
+ file_path = os.path.join(save_dir, f"{base_name}.md")
1928
+ with open(file_path, "w", encoding="utf-8") as f:
1929
+ f.write(f"# Extraction Results\n\n")
1930
+ f.write(f"Session: {session_id}\n\n")
1931
+ f.write(f"Exported: {timestamp}\n\n")
1932
+ f.write(combined_df.to_markdown(index=False))
1933
+ else:
1934
+ file_path = os.path.join(save_dir, f"{base_name}.csv")
1935
+ combined_df.to_csv(file_path, index=False)
1936
+
1937
+ exported_files.append(file_path)
1938
+
1939
+ # Also create a summary
1940
+ summary = {
1941
+ "session_id": session_id,
1942
+ "total_extractions": len(session.extractions),
1943
+ "total_rows": len(combined_df),
1944
+ "catalysts": combined_df["Catalyst"].unique().tolist() if "Catalyst" in combined_df.columns else [],
1945
+ "performances": combined_df["Performance"].unique().tolist() if "Performance" in combined_df.columns else []
1946
+ }
1947
+
1948
+ summary_path = os.path.join(save_dir, f"{base_name}_summary.json")
1949
+ with open(summary_path, "w", encoding="utf-8") as f:
1950
+ json.dump(summary, f, indent=2)
1951
+ exported_files.append(summary_path)
1952
+
1953
+ return {
1954
+ "success": True,
1955
+ "session_id": session_id,
1956
+ "export_dir": save_dir,
1957
+ "files": exported_files,
1958
+ "summary": summary,
1959
+ "preview": combined_df.head(10).to_dict(orient="records")
1960
+ }
1961
+
1962
+ except Exception as e:
1963
+ return {"success": False, "error": str(e)}
1964
+
1965
+
1966
  @mcp.tool()
1967
  def get_environment_requirements() -> Dict:
1968
  """