Spaces:

SEUyishu
/

MatTableGPT

Sleeping

App Files Files Community

SEUyishu commited on Dec 4, 2025

Commit

8f46cf0

verified ·

1 Parent(s): 84a8f07

Update app.py

Browse files

Files changed (1) hide show

app.py +626 -627

app.py CHANGED Viewed

@@ -1,627 +1,626 @@
-#!/usr/bin/env python3
-"""
-MaTableGPT Gradio Web Interface
-================================
-A web interface for the MaTableGPT MCP service.
-Provides an interactive UI for table data extraction from materials science literature.
-For HuggingFace Spaces deployment.
-"""
-import os
-import json
-import logging
-import gradio as gr
-from typing import Optional, Tuple, Dict, Any
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("matablgpt-app")
-# Import MCP service components
-try:
-    from mcp_service import (
-        table_representer,
-        table_to_json,
-        table_splitter,
-        session_manager,
-        get_extractor,
-        GPTExtractor
-    )
-    MCP_AVAILABLE = True
-except ImportError as e:
-    logger.warning(f"MCP service not available: {e}")
-    MCP_AVAILABLE = False
-# =============================================================================
-# Helper Functions
-# =============================================================================
-def format_json_output(data: Any) -> str:
-    """Format data as pretty JSON string."""
-    try:
-        return json.dumps(data, indent=2, ensure_ascii=False)
-    except:
-        return str(data)
-def check_openai_config() -> Tuple[bool, str]:
-    """Check if API configuration is complete (supports third-party services)."""
-    # Check multiple env var names
-    key = (
-        os.environ.get('LLM_API_KEY', '') or
-        os.environ.get('OPENAI_API_KEY', '')
-    )
-    base_url = (
-        os.environ.get('LLM_API_BASE', '') or
-        os.environ.get('OPENAI_API_BASE', '') or
-        os.environ.get('OPENAI_BASE_URL', '')
-    )
-    model = (
-        os.environ.get('LLM_MODEL', '') or
-        os.environ.get('OPENAI_MODEL', '') or
-        'gpt-4-turbo-preview'
-    )
-    status_parts = []
-    if key:
-        status_parts.append(f"✅ API Key: ***{key[-4:]}")
-    else:
-        return False, "⚠️ API Key not configured (set LLM_API_KEY or OPENAI_API_KEY). GPT extraction will not work."
-    if base_url:
-        # Show shortened URL
-        display_url = base_url if len(base_url) <= 35 else base_url[:32] + "..."
-        status_parts.append(f"✅ API URL: {display_url}")
-    else:
-        return False, "⚠️ API Base URL not configured (set LLM_API_BASE or OPENAI_API_BASE). Required for third-party API services."
-    status_parts.append(f"✅ Model: {model}")
-    return True, " | ".join(status_parts)
-def check_openai_key() -> Tuple[bool, str]:
-    """Legacy function - redirects to check_openai_config."""
-    return check_openai_config()
-# =============================================================================
-# Gradio Interface Functions
-# =============================================================================
-def convert_html_to_tsv(html_input: str, title: str, caption: str) -> str:
-    """Convert HTML table to TSV representation."""
-    if not MCP_AVAILABLE:
-        return "Error: MCP service not available"
-    if not html_input.strip():
-        return "Error: Please provide HTML table input"
-    try:
-        result = table_representer.html_to_tsv(html_input, title, caption)
-        return result
-    except Exception as e:
-        return f"Error: {str(e)}"
-def convert_html_to_json(html_input: str, title: str, caption: str) -> str:
-    """Convert HTML table to JSON representation."""
-    if not MCP_AVAILABLE:
-        return "Error: MCP service not available"
-    if not html_input.strip():
-        return "Error: Please provide HTML table input"
-    try:
-        result = table_to_json.html_to_json(html_input, title, caption)
-        return format_json_output(result)
-    except Exception as e:
-        return f"Error: {str(e)}"
-def analyze_table(html_input: str) -> str:
-    """Analyze HTML table structure."""
-    if not MCP_AVAILABLE:
-        return "Error: MCP service not available"
-    if not html_input.strip():
-        return "Error: Please provide HTML table input"
-    try:
-        result = table_splitter.analyze_table_structure(html_input)
-        return format_json_output(result)
-    except Exception as e:
-        return f"Error: {str(e)}"
-def split_table(html_input: str, title: str, caption: str) -> str:
-    """Split complex table into simpler components."""
-    if not MCP_AVAILABLE:
-        return "Error: MCP service not available"
-    if not html_input.strip():
-        return "Error: Please provide HTML table input"
-    try:
-        result = table_splitter.split_table(html_input, title, caption)
-        return format_json_output({
-            "table_count": len(result),
-            "tables": result
-        })
-    except Exception as e:
-        return f"Error: {str(e)}"
-def extract_zero_shot(table_repr: str) -> str:
-    """Extract catalyst data using zero-shot approach."""
-    if not MCP_AVAILABLE:
-        return "Error: MCP service not available"
-    if not table_repr.strip():
-        return "Error: Please provide table representation"
-    has_key, key_status = check_openai_key()
-    if not has_key:
-        return f"Error: {key_status}"
-    try:
-        extractor = get_extractor()
-        result = extractor.extract_zero_shot(table_repr)
-        return format_json_output(result)
-    except Exception as e:
-        return f"Error: {str(e)}"
-def extract_few_shot(table_repr: str, examples_json: str) -> str:
-    """Extract catalyst data using few-shot approach."""
-    if not MCP_AVAILABLE:
-        return "Error: MCP service not available"
-    if not table_repr.strip():
-        return "Error: Please provide table representation"
-    has_key, key_status = check_openai_key()
-    if not has_key:
-        return f"Error: {key_status}"
-    try:
-        examples = json.loads(examples_json) if examples_json.strip() else []
-        extractor = get_extractor()
-        result = extractor.extract_few_shot(table_repr, examples)
-        return format_json_output(result)
-    except json.JSONDecodeError:
-        return "Error: Invalid examples JSON format"
-    except Exception as e:
-        return f"Error: {str(e)}"
-def validate_extraction(extraction_json: str) -> str:
-    """Validate extraction result."""
-    if not extraction_json.strip():
-        return "Error: Please provide extraction JSON"
-    try:
-        extraction = json.loads(extraction_json)
-    except json.JSONDecodeError:
-        return "Error: Invalid JSON format"
-    issues = []
-    warnings = []
-    if not isinstance(extraction, dict):
-        return format_json_output({"valid": False, "issues": ["Extraction must be a dictionary"]})
-    if "error" in extraction:
-        issues.append(f"Extraction contains error: {extraction['error']}")
-    valid_performance_types = set(GPTExtractor.PERFORMANCE_LIST)
-    for catalyst_name, performances in extraction.items():
-        if catalyst_name in ["error", "raw_response", "catalysts"]:
-            continue
-        if not isinstance(performances, dict):
-            warnings.append(f"Catalyst '{catalyst_name}' should have dict of performances")
-            continue
-        for perf_name, properties in performances.items():
-            if perf_name not in valid_performance_types:
-                warnings.append(f"Unknown performance type: {perf_name}")
-            if isinstance(properties, dict):
-                for prop_key in properties.keys():
-                    if prop_key not in GPTExtractor.PROPERTY_TEMPLATE:
-                        warnings.append(f"Unknown property key: {prop_key}")
-    return format_json_output({
-        "valid": len(issues) == 0,
-        "issues": issues,
-        "warnings": warnings
-    })
-def get_performance_types() -> str:
-    """Get list of supported performance types."""
-    return format_json_output({
-        "performance_types": GPTExtractor.PERFORMANCE_LIST,
-        "property_template": GPTExtractor.PROPERTY_TEMPLATE
-    })
-def get_code_template(repr_format: str, model_type: str) -> str:
-    """Generate code template for local extraction."""
-    code = f'''"""
-MaTableGPT Local Extraction Template
-Model Type: {model_type}
-Representation Format: {repr_format}
-"""
-from openai import OpenAI
-import json
-# Initialize client
-client = OpenAI(api_key="YOUR_API_KEY")
-# Performance types to extract
-PERFORMANCE_LIST = [
-    'overpotential', 'tafel_slope', 'Rct', 'stability', 'Cdl',
-    'onset_potential', 'current_density', 'potential', 'TOF', 'ECSA',
-    'water_splitting_potential', 'mass_activity', 'exchange_current_density',
-    'Rs', 'specific_activity', 'onset_overpotential', 'BET', 'surface_area',
-    'loading', 'apparent_activation_energy'
-]
-# Your table representation
-table_representation = """
-# Paste your {repr_format.upper()} representation here
-"""
-# System prompt
-system_prompt = """I will extract catalyst performance information from the table and create JSON format.
-Performance types: """ + str(PERFORMANCE_LIST) + """
-The JSON format will have performance within the catalyst, with elements:
-reaction type, value, electrolyte, condition, current density, versus, substrate.
-Output must contain only JSON dictionary."""
-# Extract
-response = client.chat.completions.create(
-    model="gpt-4-turbo-preview",
-    messages=[
-        {{"role": "system", "content": system_prompt}},
-        {{"role": "user", "content": table_representation}}
-    ],
-    temperature=0
-)
-result = response.choices[0].message.content.strip()
-print(json.dumps(json.loads(result), indent=2))
-'''
-    return code
-# =============================================================================
-# Gradio UI
-# =============================================================================
-# Sample HTML table for demo
-SAMPLE_HTML = '''<table>
-  <thead>
-    <tr>
-      <th>Catalyst</th>
-      <th>Overpotential (mV)</th>
-      <th>Tafel Slope (mV/dec)</th>
-      <th>Electrolyte</th>
-    </tr>
-  </thead>
-  <tbody>
-    <tr>
-      <td>Pt/C</td>
-      <td>280</td>
-      <td>65</td>
-      <td>1M KOH</td>
-    </tr>
-    <tr>
-      <td>NiFe-LDH</td>
-      <td>230</td>
-      <td>45</td>
-      <td>1M KOH</td>
-    </tr>
-    <tr>
-      <td>Co3O4</td>
-      <td>350</td>
-      <td>78</td>
-      <td>1M KOH</td>
-    </tr>
-  </tbody>
-</table>'''
-def create_ui():
-    """Create Gradio interface."""
-    # Check status
-    has_key, key_status = check_openai_key()
-    status_color = "green" if has_key else "orange"
-    with gr.Blocks(
-        title="MaTableGPT - Table Data Extractor",
-        theme=gr.themes.Soft()
-    ) as app:
-        gr.Markdown("""
-        # 🔬 MaTableGPT - Table Data Extractor
-        **Extract structured catalyst performance data from HTML tables in materials science literature**
-        This tool uses GPT models to convert complex HTML tables into structured JSON data with
-        catalyst names, performance metrics (overpotential, Tafel slope, etc.), and associated properties.
-        """)
-        gr.Markdown(f"**Status:** <span style='color:{status_color}'>{key_status}</span>")
-        with gr.Tabs():
-            # Tab 1: Table Representation
-            with gr.TabItem("📋 Table Representation"):
-                gr.Markdown("### Convert HTML tables to TSV or JSON format")
-                with gr.Row():
-                    with gr.Column():
-                        html_input = gr.Textbox(
-                            label="HTML Table Input",
-                            placeholder="Paste your HTML table here...",
-                            lines=15,
-                            value=SAMPLE_HTML
-                        )
-                        title_input = gr.Textbox(
-                            label="Table Title (optional)",
-                            placeholder="e.g., Table 1: OER Catalyst Performance"
-                        )
-                        caption_input = gr.Textbox(
-                            label="Table Caption (optional)",
-                            placeholder="e.g., Performance measured at 10 mA/cm²"
-                        )
-                        with gr.Row():
-                            tsv_btn = gr.Button("Convert to TSV", variant="primary")
-                            json_btn = gr.Button("Convert to JSON", variant="primary")
-                    with gr.Column():
-                        repr_output = gr.Textbox(
-                            label="Representation Output",
-                            lines=20,
-                            show_copy_button=True
-                        )
-                tsv_btn.click(
-                    convert_html_to_tsv,
-                    inputs=[html_input, title_input, caption_input],
-                    outputs=repr_output
-                )
-                json_btn.click(
-                    convert_html_to_json,
-                    inputs=[html_input, title_input, caption_input],
-                    outputs=repr_output
-                )
-            # Tab 2: Table Analysis & Splitting
-            with gr.TabItem("🔍 Table Analysis"):
-                gr.Markdown("### Analyze and split complex tables")
-                with gr.Row():
-                    with gr.Column():
-                        html_analyze = gr.Textbox(
-                            label="HTML Table Input",
-                            placeholder="Paste your HTML table here...",
-                            lines=10,
-                            value=SAMPLE_HTML
-                        )
-                        with gr.Row():
-                            analyze_btn = gr.Button("Analyze Structure", variant="secondary")
-                            split_btn = gr.Button("Split Table", variant="secondary")
-                    with gr.Column():
-                        analysis_output = gr.Textbox(
-                            label="Analysis Result",
-                            lines=15,
-                            show_copy_button=True
-                        )
-                analyze_btn.click(
-                    analyze_table,
-                    inputs=html_analyze,
-                    outputs=analysis_output
-                )
-                split_btn.click(
-                    split_table,
-                    inputs=[html_analyze, title_input, caption_input],
-                    outputs=analysis_output
-                )
-            # Tab 3: GPT Extraction
-            with gr.TabItem("🤖 GPT Extraction"):
-                gr.Markdown("### Extract catalyst data using GPT models")
-                if not has_key:
-                    gr.Markdown("""
-                    ⚠️ **OpenAI API Key Required**
-                    Set the `OPENAI_API_KEY` environment variable to enable GPT extraction.
-                    """)
-                with gr.Row():
-                    with gr.Column():
-                        table_repr_input = gr.Textbox(
-                            label="Table Representation (TSV or JSON)",
-                            placeholder="Paste your table representation here...",
-                            lines=10
-                        )
-                        extraction_method = gr.Radio(
-                            ["Zero-shot", "Few-shot"],
-                            label="Extraction Method",
-                            value="Zero-shot"
-                        )
-                        examples_input = gr.Textbox(
-                            label="Examples (for Few-shot, JSON format)",
-                            placeholder='[{"input": "...", "output": "..."}]',
-                            lines=5,
-                            visible=False
-                        )
-                        extract_btn = gr.Button("Extract Catalyst Data", variant="primary")
-                    with gr.Column():
-                        extraction_output = gr.Textbox(
-                            label="Extraction Result",
-                            lines=20,
-                            show_copy_button=True
-                        )
-                def update_examples_visibility(method):
-                    return gr.update(visible=(method == "Few-shot"))
-                extraction_method.change(
-                    update_examples_visibility,
-                    inputs=extraction_method,
-                    outputs=examples_input
-                )
-                def extract_data(table_repr, method, examples):
-                    if method == "Zero-shot":
-                        return extract_zero_shot(table_repr)
-                    else:
-                        return extract_few_shot(table_repr, examples)
-                extract_btn.click(
-                    extract_data,
-                    inputs=[table_repr_input, extraction_method, examples_input],
-                    outputs=extraction_output
-                )
-            # Tab 4: Validation
-            with gr.TabItem("✅ Validation"):
-                gr.Markdown("### Validate extraction results")
-                with gr.Row():
-                    with gr.Column():
-                        validation_input = gr.Textbox(
-                            label="Extraction JSON to Validate",
-                            placeholder="Paste extraction JSON here...",
-                            lines=15
-                        )
-                        validate_btn = gr.Button("Validate", variant="secondary")
-                    with gr.Column():
-                        validation_output = gr.Textbox(
-                            label="Validation Result",
-                            lines=10
-                        )
-                        gr.Markdown("### Supported Performance Types")
-                        perf_types = gr.Textbox(
-                            label="",
-                            value=get_performance_types(),
-                            lines=10,
-                            interactive=False
-                        )
-                validate_btn.click(
-                    validate_extraction,
-                    inputs=validation_input,
-                    outputs=validation_output
-                )
-            # Tab 5: Code Template
-            with gr.TabItem("💻 Code Template"):
-                gr.Markdown("### Generate Python code for local extraction")
-                with gr.Row():
-                    repr_format = gr.Dropdown(
-                        ["tsv", "json"],
-                        label="Representation Format",
-                        value="tsv"
-                    )
-                    model_type = gr.Dropdown(
-                        ["zero-shot", "few-shot", "fine-tuning"],
-                        label="Model Type",
-                        value="zero-shot"
-                    )
-                generate_btn = gr.Button("Generate Code", variant="secondary")
-                code_output = gr.Code(
-                    label="Python Code Template",
-                    language="python",
-                    lines=30
-                )
-                generate_btn.click(
-                    get_code_template,
-                    inputs=[repr_format, model_type],
-                    outputs=code_output
-                )
-            # Tab 6: About
-            with gr.TabItem("ℹ️ About"):
-                gr.Markdown("""
-                ## About MaTableGPT
-                MaTableGPT is a GPT-based table data extractor specifically designed for
-                materials science literature. It converts complex HTML tables containing
-                catalyst performance data into structured JSON format.
-                ### Workflow
-                1. **Table Representation**: Convert HTML tables to TSV or JSON format
-                2. **Table Splitting** (optional): Break down complex tables with multiple headers
-                3. **GPT Extraction**: Use zero-shot, few-shot, or fine-tuned models to extract data
-                4. **Validation**: Verify extraction results against expected schema
-                ### Supported Performance Types
-                - Overpotential, Tafel slope, Rct, Stability, Cdl
-                - Onset potential, Current density, Potential, TOF, ECSA
-                - Water splitting potential, Mass activity, Exchange current density
-                - Rs, Specific activity, Onset overpotential, BET, Surface area
-                - Loading, Apparent activation energy
-                ### MCP Integration
-                This service is also available as an MCP (Model Context Protocol) server,
-                allowing integration with AI assistants like Claude.
-                ### Credits
-                Based on [MaTableGPT](https://github.com/your-repo/MaTableGPT) research.
-                """)
-        gr.Markdown("---\n*MaTableGPT MCP Service - Materials Science Table Data Extraction*")
-    return app
-# =============================================================================
-# Main Entry Point
-# =============================================================================
-def main():
-    """Run the Gradio app."""
-    app = create_ui()
-    # Get port from environment or default
-    port = int(os.environ.get('GRADIO_SERVER_PORT', 7860))
-    app.launch(
-        server_name="0.0.0.0",
-        server_port=port,
-        share=False
-    )
-if __name__ == "__main__":
-    main()

+#!/usr/bin/env python3
+"""
+MaTableGPT Gradio Web Interface
+================================
+A web interface for the MaTableGPT MCP service.
+Provides an interactive UI for table data extraction from materials science literature.
+For HuggingFace Spaces deployment.
+"""
+import os
+import json
+import logging
+import gradio as gr
+from typing import Optional, Tuple, Dict, Any
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("matablgpt-app")
+# Import MCP service components
+try:
+    from mcp_service import (
+        table_representer,
+        table_to_json,
+        table_splitter,
+        session_manager,
+        get_extractor,
+        GPTExtractor
+    )
+    MCP_AVAILABLE = True
+except ImportError as e:
+    logger.warning(f"MCP service not available: {e}")
+    MCP_AVAILABLE = False
+# =============================================================================
+# Helper Functions
+# =============================================================================
+def format_json_output(data: Any) -> str:
+    """Format data as pretty JSON string."""
+    try:
+        return json.dumps(data, indent=2, ensure_ascii=False)
+    except:
+        return str(data)
+def check_openai_config() -> Tuple[bool, str]:
+    """Check if API configuration is complete (supports third-party services)."""
+    # Check multiple env var names
+    key = (
+        os.environ.get('LLM_API_KEY', '') or
+        os.environ.get('OPENAI_API_KEY', '')
+    )
+    base_url = (
+        os.environ.get('LLM_API_BASE', '') or
+        os.environ.get('OPENAI_API_BASE', '') or
+        os.environ.get('OPENAI_BASE_URL', '')
+    )
+    model = (
+        os.environ.get('LLM_MODEL', '') or
+        os.environ.get('OPENAI_MODEL', '') or
+        'gpt-4-turbo-preview'
+    )
+    status_parts = []
+    if key:
+        status_parts.append(f"✅ API Key: ***{key[-4:]}")
+    else:
+        return False, "⚠️ API Key not configured (set LLM_API_KEY or OPENAI_API_KEY). GPT extraction will not work."
+    if base_url:
+        # Show shortened URL
+        display_url = base_url if len(base_url) <= 35 else base_url[:32] + "..."
+        status_parts.append(f"✅ API URL: {display_url}")
+    else:
+        return False, "⚠️ API Base URL not configured (set LLM_API_BASE or OPENAI_API_BASE). Required for third-party API services."
+    status_parts.append(f"✅ Model: {model}")
+    return True, " | ".join(status_parts)
+def check_openai_key() -> Tuple[bool, str]:
+    """Legacy function - redirects to check_openai_config."""
+    return check_openai_config()
+# =============================================================================
+# Gradio Interface Functions
+# =============================================================================
+def convert_html_to_tsv(html_input: str, title: str, caption: str) -> str:
+    """Convert HTML table to TSV representation."""
+    if not MCP_AVAILABLE:
+        return "Error: MCP service not available"
+    if not html_input.strip():
+        return "Error: Please provide HTML table input"
+    try:
+        result = table_representer.html_to_tsv(html_input, title, caption)
+        return result
+    except Exception as e:
+        return f"Error: {str(e)}"
+def convert_html_to_json(html_input: str, title: str, caption: str) -> str:
+    """Convert HTML table to JSON representation."""
+    if not MCP_AVAILABLE:
+        return "Error: MCP service not available"
+    if not html_input.strip():
+        return "Error: Please provide HTML table input"
+    try:
+        result = table_to_json.html_to_json(html_input, title, caption)
+        return format_json_output(result)
+    except Exception as e:
+        return f"Error: {str(e)}"
+def analyze_table(html_input: str) -> str:
+    """Analyze HTML table structure."""
+    if not MCP_AVAILABLE:
+        return "Error: MCP service not available"
+    if not html_input.strip():
+        return "Error: Please provide HTML table input"
+    try:
+        result = table_splitter.analyze_table_structure(html_input)
+        return format_json_output(result)
+    except Exception as e:
+        return f"Error: {str(e)}"
+def split_table(html_input: str, title: str, caption: str) -> str:
+    """Split complex table into simpler components."""
+    if not MCP_AVAILABLE:
+        return "Error: MCP service not available"
+    if not html_input.strip():
+        return "Error: Please provide HTML table input"
+    try:
+        result = table_splitter.split_table(html_input, title, caption)
+        return format_json_output({
+            "table_count": len(result),
+            "tables": result
+        })
+    except Exception as e:
+        return f"Error: {str(e)}"
+def extract_zero_shot(table_repr: str) -> str:
+    """Extract catalyst data using zero-shot approach."""
+    if not MCP_AVAILABLE:
+        return "Error: MCP service not available"
+    if not table_repr.strip():
+        return "Error: Please provide table representation"
+    has_key, key_status = check_openai_key()
+    if not has_key:
+        return f"Error: {key_status}"
+    try:
+        extractor = get_extractor()
+        result = extractor.extract_zero_shot(table_repr)
+        return format_json_output(result)
+    except Exception as e:
+        return f"Error: {str(e)}"
+def extract_few_shot(table_repr: str, examples_json: str) -> str:
+    """Extract catalyst data using few-shot approach."""
+    if not MCP_AVAILABLE:
+        return "Error: MCP service not available"
+    if not table_repr.strip():
+        return "Error: Please provide table representation"
+    has_key, key_status = check_openai_key()
+    if not has_key:
+        return f"Error: {key_status}"
+    try:
+        examples = json.loads(examples_json) if examples_json.strip() else []
+        extractor = get_extractor()
+        result = extractor.extract_few_shot(table_repr, examples)
+        return format_json_output(result)
+    except json.JSONDecodeError:
+        return "Error: Invalid examples JSON format"
+    except Exception as e:
+        return f"Error: {str(e)}"
+def validate_extraction(extraction_json: str) -> str:
+    """Validate extraction result."""
+    if not extraction_json.strip():
+        return "Error: Please provide extraction JSON"
+    try:
+        extraction = json.loads(extraction_json)
+    except json.JSONDecodeError:
+        return "Error: Invalid JSON format"
+    issues = []
+    warnings = []
+    if not isinstance(extraction, dict):
+        return format_json_output({"valid": False, "issues": ["Extraction must be a dictionary"]})
+    if "error" in extraction:
+        issues.append(f"Extraction contains error: {extraction['error']}")
+    valid_performance_types = set(GPTExtractor.PERFORMANCE_LIST)
+    for catalyst_name, performances in extraction.items():
+        if catalyst_name in ["error", "raw_response", "catalysts"]:
+            continue
+        if not isinstance(performances, dict):
+            warnings.append(f"Catalyst '{catalyst_name}' should have dict of performances")
+            continue
+        for perf_name, properties in performances.items():
+            if perf_name not in valid_performance_types:
+                warnings.append(f"Unknown performance type: {perf_name}")
+            if isinstance(properties, dict):
+                for prop_key in properties.keys():
+                    if prop_key not in GPTExtractor.PROPERTY_TEMPLATE:
+                        warnings.append(f"Unknown property key: {prop_key}")
+    return format_json_output({
+        "valid": len(issues) == 0,
+        "issues": issues,
+        "warnings": warnings
+    })
+def get_performance_types() -> str:
+    """Get list of supported performance types."""
+    return format_json_output({
+        "performance_types": GPTExtractor.PERFORMANCE_LIST,
+        "property_template": GPTExtractor.PROPERTY_TEMPLATE
+    })
+def get_code_template(repr_format: str, model_type: str) -> str:
+    """Generate code template for local extraction."""
+    code = f'''"""
+MaTableGPT Local Extraction Template
+Model Type: {model_type}
+Representation Format: {repr_format}
+"""
+from openai import OpenAI
+import json
+# Initialize client
+client = OpenAI(api_key="YOUR_API_KEY")
+# Performance types to extract
+PERFORMANCE_LIST = [
+    'overpotential', 'tafel_slope', 'Rct', 'stability', 'Cdl',
+    'onset_potential', 'current_density', 'potential', 'TOF', 'ECSA',
+    'water_splitting_potential', 'mass_activity', 'exchange_current_density',
+    'Rs', 'specific_activity', 'onset_overpotential', 'BET', 'surface_area',
+    'loading', 'apparent_activation_energy'
+]
+# Your table representation
+table_representation = """
+# Paste your {repr_format.upper()} representation here
+"""
+# System prompt
+system_prompt = """I will extract catalyst performance information from the table and create JSON format.
+Performance types: """ + str(PERFORMANCE_LIST) + """
+The JSON format will have performance within the catalyst, with elements:
+reaction type, value, electrolyte, condition, current density, versus, substrate.
+Output must contain only JSON dictionary."""
+# Extract
+response = client.chat.completions.create(
+    model="gpt-4-turbo-preview",
+    messages=[
+        {{"role": "system", "content": system_prompt}},
+        {{"role": "user", "content": table_representation}}
+    ],
+    temperature=0
+)
+result = response.choices[0].message.content.strip()
+print(json.dumps(json.loads(result), indent=2))
+'''
+    return code
+# =============================================================================
+# Gradio UI
+# =============================================================================
+# Sample HTML table for demo
+SAMPLE_HTML = '''<table>
+  <thead>
+    <tr>
+      <th>Catalyst</th>
+      <th>Overpotential (mV)</th>
+      <th>Tafel Slope (mV/dec)</th>
+      <th>Electrolyte</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>Pt/C</td>
+      <td>280</td>
+      <td>65</td>
+      <td>1M KOH</td>
+    </tr>
+    <tr>
+      <td>NiFe-LDH</td>
+      <td>230</td>
+      <td>45</td>
+      <td>1M KOH</td>
+    </tr>
+    <tr>
+      <td>Co3O4</td>
+      <td>350</td>
+      <td>78</td>
+      <td>1M KOH</td>
+    </tr>
+  </tbody>
+</table>'''
+def create_ui():
+    """Create Gradio interface."""
+    # Check status
+    has_key, key_status = check_openai_key()
+    status_color = "green" if has_key else "orange"
+    with gr.Blocks(
+        title="MaTableGPT - Table Data Extractor"
+    ) as app:
+        gr.Markdown("""
+        # 🔬 MaTableGPT - Table Data Extractor
+        **Extract structured catalyst performance data from HTML tables in materials science literature**
+        This tool uses GPT models to convert complex HTML tables into structured JSON data with
+        catalyst names, performance metrics (overpotential, Tafel slope, etc.), and associated properties.
+        """)
+        gr.Markdown(f"**Status:** <span style='color:{status_color}'>{key_status}</span>")
+        with gr.Tabs():
+            # Tab 1: Table Representation
+            with gr.TabItem("📋 Table Representation"):
+                gr.Markdown("### Convert HTML tables to TSV or JSON format")
+                with gr.Row():
+                    with gr.Column():
+                        html_input = gr.Textbox(
+                            label="HTML Table Input",
+                            placeholder="Paste your HTML table here...",
+                            lines=15,
+                            value=SAMPLE_HTML
+                        )
+                        title_input = gr.Textbox(
+                            label="Table Title (optional)",
+                            placeholder="e.g., Table 1: OER Catalyst Performance"
+                        )
+                        caption_input = gr.Textbox(
+                            label="Table Caption (optional)",
+                            placeholder="e.g., Performance measured at 10 mA/cm²"
+                        )
+                        with gr.Row():
+                            tsv_btn = gr.Button("Convert to TSV", variant="primary")
+                            json_btn = gr.Button("Convert to JSON", variant="primary")
+                    with gr.Column():
+                        repr_output = gr.Textbox(
+                            label="Representation Output",
+                            lines=20,
+                            show_copy_button=True
+                        )
+                tsv_btn.click(
+                    convert_html_to_tsv,
+                    inputs=[html_input, title_input, caption_input],
+                    outputs=repr_output
+                )
+                json_btn.click(
+                    convert_html_to_json,
+                    inputs=[html_input, title_input, caption_input],
+                    outputs=repr_output
+                )
+            # Tab 2: Table Analysis & Splitting
+            with gr.TabItem("🔍 Table Analysis"):
+                gr.Markdown("### Analyze and split complex tables")
+                with gr.Row():
+                    with gr.Column():
+                        html_analyze = gr.Textbox(
+                            label="HTML Table Input",
+                            placeholder="Paste your HTML table here...",
+                            lines=10,
+                            value=SAMPLE_HTML
+                        )
+                        with gr.Row():
+                            analyze_btn = gr.Button("Analyze Structure", variant="secondary")
+                            split_btn = gr.Button("Split Table", variant="secondary")
+                    with gr.Column():
+                        analysis_output = gr.Textbox(
+                            label="Analysis Result",
+                            lines=15,
+                            show_copy_button=True
+                        )
+                analyze_btn.click(
+                    analyze_table,
+                    inputs=html_analyze,
+                    outputs=analysis_output
+                )
+                split_btn.click(
+                    split_table,
+                    inputs=[html_analyze, title_input, caption_input],
+                    outputs=analysis_output
+                )
+            # Tab 3: GPT Extraction
+            with gr.TabItem("🤖 GPT Extraction"):
+                gr.Markdown("### Extract catalyst data using GPT models")
+                if not has_key:
+                    gr.Markdown("""
+                    ⚠️ **OpenAI API Key Required**
+                    Set the `OPENAI_API_KEY` environment variable to enable GPT extraction.
+                    """)
+                with gr.Row():
+                    with gr.Column():
+                        table_repr_input = gr.Textbox(
+                            label="Table Representation (TSV or JSON)",
+                            placeholder="Paste your table representation here...",
+                            lines=10
+                        )
+                        extraction_method = gr.Radio(
+                            ["Zero-shot", "Few-shot"],
+                            label="Extraction Method",
+                            value="Zero-shot"
+                        )
+                        examples_input = gr.Textbox(
+                            label="Examples (for Few-shot, JSON format)",
+                            placeholder='[{"input": "...", "output": "..."}]',
+                            lines=5,
+                            visible=False
+                        )
+                        extract_btn = gr.Button("Extract Catalyst Data", variant="primary")
+                    with gr.Column():
+                        extraction_output = gr.Textbox(
+                            label="Extraction Result",
+                            lines=20,
+                            show_copy_button=True
+                        )
+                def update_examples_visibility(method):
+                    return gr.update(visible=(method == "Few-shot"))
+                extraction_method.change(
+                    update_examples_visibility,
+                    inputs=extraction_method,
+                    outputs=examples_input
+                )
+                def extract_data(table_repr, method, examples):
+                    if method == "Zero-shot":
+                        return extract_zero_shot(table_repr)
+                    else:
+                        return extract_few_shot(table_repr, examples)
+                extract_btn.click(
+                    extract_data,
+                    inputs=[table_repr_input, extraction_method, examples_input],
+                    outputs=extraction_output
+                )
+            # Tab 4: Validation
+            with gr.TabItem("✅ Validation"):
+                gr.Markdown("### Validate extraction results")
+                with gr.Row():
+                    with gr.Column():
+                        validation_input = gr.Textbox(
+                            label="Extraction JSON to Validate",
+                            placeholder="Paste extraction JSON here...",
+                            lines=15
+                        )
+                        validate_btn = gr.Button("Validate", variant="secondary")
+                    with gr.Column():
+                        validation_output = gr.Textbox(
+                            label="Validation Result",
+                            lines=10
+                        )
+                        gr.Markdown("### Supported Performance Types")
+                        perf_types = gr.Textbox(
+                            label="",
+                            value=get_performance_types(),
+                            lines=10,
+                            interactive=False
+                        )
+                validate_btn.click(
+                    validate_extraction,
+                    inputs=validation_input,
+                    outputs=validation_output
+                )
+            # Tab 5: Code Template
+            with gr.TabItem("💻 Code Template"):
+                gr.Markdown("### Generate Python code for local extraction")
+                with gr.Row():
+                    repr_format = gr.Dropdown(
+                        ["tsv", "json"],
+                        label="Representation Format",
+                        value="tsv"
+                    )
+                    model_type = gr.Dropdown(
+                        ["zero-shot", "few-shot", "fine-tuning"],
+                        label="Model Type",
+                        value="zero-shot"
+                    )
+                generate_btn = gr.Button("Generate Code", variant="secondary")
+                code_output = gr.Code(
+                    label="Python Code Template",
+                    language="python",
+                    lines=30
+                )
+                generate_btn.click(
+                    get_code_template,
+                    inputs=[repr_format, model_type],
+                    outputs=code_output
+                )
+            # Tab 6: About
+            with gr.TabItem("ℹ️ About"):
+                gr.Markdown("""
+                ## About MaTableGPT
+                MaTableGPT is a GPT-based table data extractor specifically designed for
+                materials science literature. It converts complex HTML tables containing
+                catalyst performance data into structured JSON format.
+                ### Workflow
+                1. **Table Representation**: Convert HTML tables to TSV or JSON format
+                2. **Table Splitting** (optional): Break down complex tables with multiple headers
+                3. **GPT Extraction**: Use zero-shot, few-shot, or fine-tuned models to extract data
+                4. **Validation**: Verify extraction results against expected schema
+                ### Supported Performance Types
+                - Overpotential, Tafel slope, Rct, Stability, Cdl
+                - Onset potential, Current density, Potential, TOF, ECSA
+                - Water splitting potential, Mass activity, Exchange current density
+                - Rs, Specific activity, Onset overpotential, BET, Surface area
+                - Loading, Apparent activation energy
+                ### MCP Integration
+                This service is also available as an MCP (Model Context Protocol) server,
+                allowing integration with AI assistants like Claude.
+                ### Credits
+                Based on [MaTableGPT](https://github.com/your-repo/MaTableGPT) research.
+                """)
+        gr.Markdown("---\n*MaTableGPT MCP Service - Materials Science Table Data Extraction*")
+    return app
+# =============================================================================
+# Main Entry Point
+# =============================================================================
+def main():
+    """Run the Gradio app."""
+    app = create_ui()
+    # Get port from environment or default
+    port = int(os.environ.get('GRADIO_SERVER_PORT', 7860))
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=port,
+        share=False
+    )
+if __name__ == "__main__":
+    main()