Spaces:
Sleeping
Sleeping
| """ | |
| PhD Research OS β Taxonomy Manager GUI | |
| ======================================== | |
| A Gradio companion application for managing domain-specific study type | |
| taxonomies that integrate with the Research OS confidence scoring engine. | |
| Features: | |
| - View and edit the 8-tier Quantum-Bio base taxonomy | |
| - Create, edit, delete domain-specific taxonomies | |
| - Add custom study types per domain with calibrated weights | |
| - Live confidence scoring calculator | |
| - Migration status and rollback controls | |
| - Full audit trail | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import gradio as gr | |
| # Add parent to path for imports | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from phd_research_os.taxonomy import ( | |
| TaxonomyManager, STUDY_TYPE_WEIGHTS, STUDY_TYPE_DESCRIPTIONS, | |
| ALLOWED_STUDY_TYPES, TAXONOMY_VERSION, PIPELINE_VERSION, | |
| DEFAULT_DOMAINS, LEGACY_TO_V2_MAP | |
| ) | |
| from phd_research_os.db import init_db, from_fixed | |
| # Initialize | |
| DB_PATH = os.environ.get("RESEARCH_OS_DB", "data/research_os.db") | |
| os.makedirs("data", exist_ok=True) | |
| tm = TaxonomyManager(db_path=DB_PATH) | |
| # ============================================================ | |
| # Helper Functions | |
| # ============================================================ | |
| def get_base_taxonomy_table(): | |
| """Get the base 8-tier taxonomy as a table.""" | |
| rows = [] | |
| for st in ALLOWED_STUDY_TYPES: | |
| weight = STUDY_TYPE_WEIGHTS[st] | |
| desc = STUDY_TYPE_DESCRIPTIONS.get(st, "") | |
| rows.append([st, from_fixed(weight), desc]) | |
| return rows | |
| def get_domain_list(): | |
| """Get list of domain names for dropdown.""" | |
| domains = tm.list_domains() | |
| return [f"{d['domain_id']} β {d['name']}" for d in domains] | |
| def get_domain_table(domain_selection): | |
| """Get study types table for a selected domain.""" | |
| if not domain_selection: | |
| return [] | |
| domain_id = domain_selection.split(" β ")[0] | |
| all_types = tm.get_all_study_types(domain_id) | |
| rows = [] | |
| for st, info in sorted(all_types.items(), key=lambda x: -x[1]["weight"]): | |
| rows.append([ | |
| st, | |
| info["weight_float"], | |
| info["description"], | |
| info["source"], | |
| ]) | |
| return rows | |
| def score_confidence_ui(evidence_strength, study_type, journal_tier, | |
| is_complete, domain_selection): | |
| """Calculate confidence score from UI inputs.""" | |
| if not domain_selection: | |
| domain_id = "quantum_bio" | |
| else: | |
| domain_id = domain_selection.split(" β ")[0] | |
| tier_map = {"Tier 1": 1, "Tier 2": 2, "Tier 3": 3, "Preprint": 0} | |
| tier = tier_map.get(journal_tier, 2) | |
| result = tm.score_confidence( | |
| evidence_strength=evidence_strength, | |
| study_type=study_type, | |
| journal_tier=tier, | |
| is_complete=is_complete, | |
| domain_id=domain_id, | |
| ) | |
| breakdown = f"""## Confidence Score: **{result['confidence']:.3f}** | |
| ### Breakdown | |
| | Factor | Value | | |
| |--------|-------| | |
| | Evidence Strength | {result['evidence_strength']:.3f} | | |
| | Study Quality Weight | {result['study_quality_weight']:.3f} | | |
| | Journal Tier Weight | {result['journal_tier_weight']:.3f} | | |
| | Completeness Penalty | {result['completeness_penalty']:.3f} | | |
| ### Formula | |
| `{result['evidence_strength']:.3f} Γ {result['study_quality_weight']:.3f} Γ {result['journal_tier_weight']:.3f} Γ {result['completeness_penalty']:.3f} = {result['confidence']:.3f}` | |
| ### Metadata | |
| - **Study Type (normalized):** `{result['study_type_normalized']}` | |
| - **Taxonomy Version:** `{result['taxonomy_version']}` | |
| - **Domain:** `{result['domain_id']}` | |
| """ | |
| return breakdown | |
| def create_domain_ui(domain_id, name, description): | |
| """Create a new domain taxonomy.""" | |
| if not domain_id or not name: | |
| return "β Domain ID and Name are required", get_domain_list() | |
| domain_id = domain_id.strip().lower().replace(" ", "_").replace("-", "_") | |
| try: | |
| tm.create_domain(domain_id, name.strip(), description.strip()) | |
| return f"β Domain '{name}' created successfully!", get_domain_list() | |
| except Exception as e: | |
| return f"β Error: {str(e)}", get_domain_list() | |
| def add_study_type_ui(domain_selection, type_name, weight, description): | |
| """Add a custom study type to a domain.""" | |
| if not domain_selection or not type_name: | |
| return "β Select a domain and provide a type name" | |
| domain_id = domain_selection.split(" β ")[0] | |
| type_name = type_name.strip().lower().replace(" ", "_").replace("-", "_") | |
| try: | |
| tm.add_study_type(domain_id, type_name, weight, description.strip()) | |
| return f"β Study type '{type_name}' added to {domain_id} (weight={weight})" | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| def remove_study_type_ui(domain_selection, type_name): | |
| """Remove a custom study type from a domain.""" | |
| if not domain_selection or not type_name: | |
| return "β Select a domain and provide a type name" | |
| domain_id = domain_selection.split(" β ")[0] | |
| type_name = type_name.strip().lower().replace(" ", "_") | |
| if tm.remove_study_type(domain_id, type_name): | |
| return f"β Removed '{type_name}' from {domain_id}" | |
| return f"β Type '{type_name}' not found in domain {domain_id}" | |
| def delete_domain_ui(domain_selection): | |
| """Delete (deactivate) a domain.""" | |
| if not domain_selection: | |
| return "β Select a domain", get_domain_list() | |
| domain_id = domain_selection.split(" β ")[0] | |
| if tm.delete_domain(domain_id): | |
| return f"β Domain '{domain_id}' deactivated", get_domain_list() | |
| return f"β Cannot delete base taxonomy 'quantum_bio'", get_domain_list() | |
| def run_migration_ui(): | |
| """Run V2 migration.""" | |
| result = tm.migrate_to_v2() | |
| if result.get("already_migrated"): | |
| return "βΉοΈ Database already migrated to V2" | |
| if result.get("errors"): | |
| return f"β Migration errors: {result['errors']}" | |
| return f"β Migration complete! {result['rows_backfilled']} claims backfilled to V2" | |
| def run_rollback_ui(): | |
| """Run V1 rollback.""" | |
| result = tm.rollback_to_v1() | |
| if result.get("errors"): | |
| return f"β Rollback errors: {result['errors']}" | |
| return f"β Rollback complete! {result['rows_reverted']} claims reverted to V1" | |
| def get_audit_log_ui(): | |
| """Get taxonomy audit log.""" | |
| entries = tm.get_audit_log(limit=20) | |
| if not entries: | |
| return "No audit entries yet" | |
| lines = ["| Timestamp | Action | Domain | Details |", "|-----------|--------|--------|---------|"] | |
| for e in entries: | |
| ts = e.get("timestamp", "")[:19] | |
| lines.append(f"| {ts} | {e.get('action', '')} | {e.get('domain_id', '-')} | {e.get('details', '')[:60]} |") | |
| return "\n".join(lines) | |
| def normalize_type_ui(raw_type): | |
| """Show how a raw study type would be normalized.""" | |
| normalized = tm.normalize_study_type(raw_type) | |
| weight = tm.get_weight_float(normalized) | |
| return f"**`{raw_type}`** β **`{normalized}`** (weight: {weight:.3f})" | |
| # ============================================================ | |
| # Gradio UI | |
| # ============================================================ | |
| THEME = gr.themes.Base( | |
| primary_hue="blue", | |
| secondary_hue="slate", | |
| neutral_hue="slate", | |
| font=gr.themes.GoogleFont("Inter"), | |
| ).set( | |
| body_background_fill="*neutral_950", | |
| body_background_fill_dark="*neutral_950", | |
| block_background_fill="*neutral_900", | |
| block_background_fill_dark="*neutral_900", | |
| block_border_color="*neutral_700", | |
| input_background_fill="*neutral_800", | |
| input_background_fill_dark="*neutral_800", | |
| ) | |
| CSS = """ | |
| .gradio-container { max-width: 1200px !important; } | |
| .dark { background: #0a0a0a; } | |
| """ | |
| with gr.Blocks(theme=THEME, css=CSS, title="PhD Research OS β Taxonomy Manager") as app: | |
| gr.Markdown(""" | |
| # 𧬠PhD Research OS β Taxonomy Manager | |
| **Quantum-Bio Taxonomy V2** | Manage domain-specific study type taxonomies for confidence scoring | |
| `taxonomy_version: quantum_bio_v1` | `pipeline_version: 2.1.0` | |
| """) | |
| with gr.Tabs(): | |
| # ββ Tab 1: Base Taxonomy ββββββββββββββββββββββββββ | |
| with gr.Tab("π Base Taxonomy (8-Tier)"): | |
| gr.Markdown("### Quantum-Bio 8-Tier Study Type Weights\nThese are the core weights used across all domains. Fixed-point math (Γ1000).") | |
| base_table = gr.Dataframe( | |
| headers=["Study Type", "Weight", "Description"], | |
| value=get_base_taxonomy_table(), | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| gr.Markdown("### Study Type Normalizer\nTest how legacy or alias study types map to V2:") | |
| with gr.Row(): | |
| raw_input = gr.Textbox(label="Raw Study Type", placeholder="e.g., PrimaryExperimental, clinical_trial, simulation") | |
| norm_output = gr.Markdown() | |
| raw_input.change(normalize_type_ui, inputs=raw_input, outputs=norm_output) | |
| # ββ Tab 2: Domain Taxonomies βββββββββββββββββββββ | |
| with gr.Tab("π Domain Taxonomies"): | |
| gr.Markdown("### Manage Domain-Specific Taxonomies\nEach domain extends the base 8-tier system with custom study types.") | |
| domain_dropdown = gr.Dropdown( | |
| label="Select Domain", | |
| choices=get_domain_list(), | |
| interactive=True, | |
| ) | |
| domain_table = gr.Dataframe( | |
| headers=["Study Type", "Weight", "Description", "Source"], | |
| value=[], | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| domain_dropdown.change(get_domain_table, inputs=domain_dropdown, outputs=domain_table) | |
| gr.Markdown("---\n### Create New Domain") | |
| with gr.Row(): | |
| new_domain_id = gr.Textbox(label="Domain ID", placeholder="e.g., organic_chemistry") | |
| new_domain_name = gr.Textbox(label="Domain Name", placeholder="e.g., Organic Chemistry") | |
| new_domain_desc = gr.Textbox(label="Description", placeholder="Taxonomy for organic synthesis and reaction mechanisms") | |
| create_btn = gr.Button("Create Domain", variant="primary") | |
| create_status = gr.Markdown() | |
| create_btn.click( | |
| create_domain_ui, | |
| inputs=[new_domain_id, new_domain_name, new_domain_desc], | |
| outputs=[create_status, domain_dropdown], | |
| ) | |
| gr.Markdown("---\n### Add Custom Study Type") | |
| with gr.Row(): | |
| add_type_name = gr.Textbox(label="Type Name", placeholder="e.g., single_crystal_xrd") | |
| add_type_weight = gr.Slider(0.0, 1.0, value=0.8, step=0.05, label="Weight") | |
| add_type_desc = gr.Textbox(label="Description", placeholder="e.g., Single-crystal X-ray diffraction structure determination") | |
| add_type_btn = gr.Button("Add Study Type", variant="primary") | |
| add_type_status = gr.Markdown() | |
| add_type_btn.click( | |
| add_study_type_ui, | |
| inputs=[domain_dropdown, add_type_name, add_type_weight, add_type_desc], | |
| outputs=add_type_status, | |
| ) | |
| gr.Markdown("---\n### Remove Study Type / Delete Domain") | |
| with gr.Row(): | |
| rm_type_name = gr.Textbox(label="Type to Remove", placeholder="e.g., single_crystal_xrd") | |
| rm_btn = gr.Button("Remove Type", variant="secondary") | |
| rm_status = gr.Markdown() | |
| rm_btn.click(remove_study_type_ui, inputs=[domain_dropdown, rm_type_name], outputs=rm_status) | |
| del_btn = gr.Button("ποΈ Deactivate Selected Domain", variant="stop") | |
| del_status = gr.Markdown() | |
| del_btn.click(delete_domain_ui, inputs=domain_dropdown, outputs=[del_status, domain_dropdown]) | |
| # ββ Tab 3: Confidence Calculator βββββββββββββββββ | |
| with gr.Tab("π¬ Confidence Calculator"): | |
| gr.Markdown("### Live Confidence Scoring\nCalculate confidence using the V2 formula with domain-aware weights.") | |
| with gr.Row(): | |
| calc_evidence = gr.Slider(0.0, 1.0, value=0.85, step=0.01, label="Evidence Strength") | |
| calc_type = gr.Dropdown( | |
| label="Study Type", | |
| choices=ALLOWED_STUDY_TYPES + ["simulation", "primary_experimental", "clinical_trial"], | |
| value="direct_physical_measurement", | |
| ) | |
| with gr.Row(): | |
| calc_tier = gr.Dropdown( | |
| label="Journal Tier", | |
| choices=["Tier 1", "Tier 2", "Tier 3", "Preprint"], | |
| value="Tier 1", | |
| ) | |
| calc_complete = gr.Checkbox(label="Claim is Complete (all fields present)", value=True) | |
| calc_domain = gr.Dropdown( | |
| label="Domain", | |
| choices=get_domain_list(), | |
| value=get_domain_list()[0] if get_domain_list() else None, | |
| ) | |
| calc_btn = gr.Button("Calculate Confidence", variant="primary") | |
| calc_result = gr.Markdown() | |
| calc_btn.click( | |
| score_confidence_ui, | |
| inputs=[calc_evidence, calc_type, calc_tier, calc_complete, calc_domain], | |
| outputs=calc_result, | |
| ) | |
| # ββ Tab 4: Migration & Rollback ββββββββββββββββββ | |
| with gr.Tab("βοΈ Migration & Rollback"): | |
| gr.Markdown(f"""### Database Migration Controls | |
| **Current Taxonomy Version:** `{TAXONOMY_VERSION}` | |
| **Current Pipeline Version:** `{PIPELINE_VERSION}` | |
| β οΈ **Migration** converts legacy 4-tier study types to V2 8-tier. **Rollback** reverts to V1. | |
| Both operations are idempotent and logged. | |
| """) | |
| with gr.Row(): | |
| migrate_btn = gr.Button("π Migrate to V2", variant="primary") | |
| rollback_btn = gr.Button("βͺ Rollback to V1", variant="stop") | |
| migration_status = gr.Markdown() | |
| migrate_btn.click(run_migration_ui, outputs=migration_status) | |
| rollback_btn.click(run_rollback_ui, outputs=migration_status) | |
| gr.Markdown("---\n### Legacy β V2 Mapping Reference") | |
| mapping_rows = [[k, v] for k, v in sorted(LEGACY_TO_V2_MAP.items())] | |
| gr.Dataframe( | |
| headers=["Legacy/Alias", "Maps To (V2)"], | |
| value=mapping_rows, | |
| interactive=False, | |
| ) | |
| # ββ Tab 5: AI Model Council ββββββββββββββββββββββ | |
| with gr.Tab("ποΈ AI Model Council"): | |
| gr.Markdown("""### AI Model Council β Multi-Agent Claim Extraction | |
| The Council is a 4-member deliberation pipeline that produces higher-quality claim extraction: | |
| | Member | Role | What It Does | | |
| |--------|------|-------------| | |
| | **Query Planner** | Decomposer | Breaks complex questions into 2-4 search queries | | |
| | **Extractor** | Claim Miner | Extracts atomic claims with epistemic tags | | |
| | **Critic** | Challenger | Reviews claims against source, flags errors | | |
| | **Chairman** | Synthesizer | Produces final claims with 0.7 completeness penalty | | |
| """) | |
| with gr.Row(): | |
| council_text = gr.Textbox( | |
| label="Scientific Text", | |
| placeholder="Paste a paragraph from a scientific paper...", | |
| lines=8, | |
| ) | |
| council_query = gr.Textbox( | |
| label="Research Question (optional)", | |
| placeholder="e.g., What is the detection limit for GFET cardiac sensors?", | |
| ) | |
| council_btn = gr.Button("ποΈ Convene Council", variant="primary") | |
| with gr.Row(): | |
| council_claims = gr.JSON(label="Final Claims (Chairman Output)") | |
| with gr.Accordion("Council Deliberation Details", open=False): | |
| council_queries = gr.JSON(label="Query Plan (Planner)") | |
| council_raw = gr.JSON(label="Raw Extraction (Extractor)") | |
| council_critique = gr.JSON(label="Critique (Critic)") | |
| council_meta = gr.JSON(label="Round Metadata") | |
| def run_council_ui(text, query): | |
| if not text or len(text.strip()) < 50: | |
| return ( | |
| [{"error": "Please provide at least 50 characters of scientific text"}], | |
| [], [], {}, {} | |
| ) | |
| from phd_research_os.council import ModelCouncil | |
| council = ModelCouncil(brain=None, db_path=DB_PATH) | |
| result = council.deliberate(text, query=query if query else None) | |
| return ( | |
| result.final_claims, | |
| result.query_plan, | |
| result.raw_extraction, | |
| result.critique, | |
| result.metadata, | |
| ) | |
| council_btn.click( | |
| run_council_ui, | |
| inputs=[council_text, council_query], | |
| outputs=[council_claims, council_queries, council_raw, council_critique, council_meta], | |
| ) | |
| gr.Markdown("---\n### Council History") | |
| history_btn = gr.Button("π Load History") | |
| history_display = gr.Dataframe( | |
| headers=["Round ID", "Claims", "Tokens", "Started"], | |
| value=[], | |
| ) | |
| def load_council_history(): | |
| from phd_research_os.council import ModelCouncil | |
| council = ModelCouncil(brain=None, db_path=DB_PATH) | |
| history = council.get_council_history(limit=10) | |
| return [[h["round_id"], h.get("final_claim_count", 0), | |
| h.get("total_tokens", 0), h.get("started_at", "")[:19]] | |
| for h in history] | |
| history_btn.click(load_council_history, outputs=history_display) | |
| # ββ Tab 6: Audit Log βββββββββββββββββββββββββββββ | |
| with gr.Tab("π Audit Log"): | |
| gr.Markdown("### Taxonomy Audit Trail\nEvery domain change, migration, and rollback is logged.") | |
| refresh_btn = gr.Button("π Refresh Log") | |
| audit_display = gr.Markdown(value=get_audit_log_ui()) | |
| refresh_btn.click(get_audit_log_ui, outputs=audit_display) | |
| # ============================================================ | |
| # Launch | |
| # ============================================================ | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0", server_port=7860, share=False) | |