import gradio as gr from wiki_data_fetcher import get_random_wikipedia_title from feedback import save_feedback_agree, save_feedback_disagree from contextlib import nullcontext from dotenv import load_dotenv import logfire import os # Load API keys load_dotenv() # Setup logging with Logfire logfire.configure() # This goes after logfire.configure() to avoid # LogfireNotConfiguredWarning: Instrumentation will have no effect from app_functions import ( _fetch_current_revision, _fetch_previous_revision, _run_heuristic_classifier, _run_fewshot_classifier, _run_judge, find_interesting_example, ) def start_parent_span(page_title: str, number_behind: int, units_behind: str): """ Start a parent span and return the context for propagation to children. See https://logfire.pydantic.dev/docs/how-to-guides/distributed-tracing/#manual-context-propagation """ span_name = f"{page_title} - {number_behind} {units_behind}" with logfire.span(span_name) as span: span.__enter__() context = logfire.get_context() return context def fetch_current_revision(title: str, context=None): """ Wrapper to run _fetch_current_revision in provided Logfire context. We use a wrapper to minimize indentation in the called function. """ with logfire.attach_context(context) if context else nullcontext(): return _fetch_current_revision(title) def fetch_previous_revision( title: str, number: int, units: str, new_revision: str, context=None ): with logfire.attach_context(context) if context else nullcontext(): return _fetch_previous_revision(title, number, units, new_revision) def run_heuristic_classifier(old_revision: str, new_revision: str, context=None): with logfire.attach_context(context) if context else nullcontext(): return _run_heuristic_classifier(old_revision, new_revision) def run_fewshot_classifier(old_revision: str, new_revision: str, context=None): with logfire.attach_context(context) if context else nullcontext(): return _run_fewshot_classifier(old_revision, new_revision) def run_judge( old_revision: str, new_revision: str, heuristic_noteworthy: bool, fewshot_noteworthy: bool, heuristic_rationale: str, fewshot_rationale: str, context=None, ): with logfire.attach_context(context) if context else nullcontext(): return _run_judge( old_revision, new_revision, heuristic_noteworthy, fewshot_noteworthy, heuristic_rationale, fewshot_rationale, ) # Create Gradio interface with gr.Blocks(title="Noteworthy Differences") as demo: with gr.Row(): gr.Markdown( """
Compare current and old revisions of a Wikipedia article
📅 You choose the number of revisions or days behind
◇ ∴ ⚖ Two classifier models and a judge predict the noteworthiness of the differences The GitHub repository describes how the judge was aligned with human preferences
👥 The feedback dataset holds all user feedback collected to date
""", elem_id="intro-table", ) with gr.Row(): with gr.Column(scale=1): with gr.Accordion("Query Instructions", open=False) as accordion: gr.Markdown( """ - Page title is case sensitive; use underscores or spaces - Specify any number of days or up to 499 revisions behind - The closest available revision is retrieved - Only article introductions are downloaded """ ) with gr.Accordion("Confidence Scores", open=False) as accordion: gr.Markdown( """ - **High:** heuristic = few-shot, judge agrees - **Moderate:** heuristic ≠ few-shot, judge decides - **Questionable:** heuristic = few-shot, judge vetoes """ ) with gr.Column(scale=3): with gr.Row(): page_title = gr.Textbox( label="Wikipedia Page Title", placeholder="e.g., Albert Einstein", value="", ) number_behind = gr.Number( label="Number Behind", value=50, minimum=0, precision=0 ) units_behind = gr.Dropdown( choices=["revisions", "days"], value="revisions", label="Units Behind", ) with gr.Column(scale=1): random_btn = gr.Button("Get Random Page Title", size="md") submit_btn = gr.Button( "Fetch Revisions and Run Model", size="md", variant="primary" ) rerun_btn = gr.Button("Rerun Model", size="md") with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Old Revision") old_timestamp = gr.Markdown("") old_revision = gr.Textbox(label="", lines=15, max_lines=30, container=False) with gr.Column(scale=1): gr.Markdown("### Current Revision") new_timestamp = gr.Markdown("") new_revision = gr.Textbox(label="", lines=15, max_lines=30, container=False) with gr.Column(scale=2): gr.Markdown("### Model Output") with gr.Row(): with gr.Column(): heuristic_rationale = gr.Textbox( label="◇ Heuristic Model's Rationale", lines=2, max_lines=7, ) fewshot_rationale = gr.Textbox( label="∴ Few-shot Model's Rationale", lines=2, max_lines=7, ) judge_reasoning = gr.Textbox( label="⚖ Judge's Reasoning", lines=2, max_lines=7, ) with gr.Column(): with gr.Row(variant="default"): noteworthy_text = gr.Textbox( label="Noteworthy Differences", lines=1, interactive=False, ) confidence_score = gr.Textbox( label="Confidence Score", lines=1, interactive=False, ) gr.Markdown("### 👥 Your feedback") with gr.Row(): thumbs_up_btn = gr.Button( "👍 Agree", size="md", variant="primary" ) thumbs_down_btn = gr.Button( "👎 Disagree", size="md", variant="primary" ) with gr.Accordion("Find Interesting Example", open=True): special_random_btn = gr.Button( "🎲 Special Random", size="md", variant="secondary" ) gr.Markdown( """ *Click to find an interesting example by running the model on random pages until the confidence score is not High, up to 20 tries*""" ) # States to store boolean values heuristic_noteworthy = gr.State() fewshot_noteworthy = gr.State() judge_noteworthy = gr.State() # State to store Logfire context context = gr.State() random_btn.click( fn=get_random_wikipedia_title, inputs=None, outputs=[page_title], ) gr.on( # Press Enter in textbox or use button to submit triggers=[page_title.submit, submit_btn.click], # Clear the new_revision and new_timestamp values before proceeding. # The empty values will propagate to the other components (through function return values) if there is an error. fn=lambda: (gr.update(value=""), gr.update(value="")), inputs=None, outputs=[new_revision, new_timestamp], api_name=False, ).then( # Initialize Logfire context fn=start_parent_span, inputs=[page_title, number_behind, units_behind], outputs=context, ).then( fn=fetch_current_revision, inputs=[page_title, context], outputs=[new_revision, new_timestamp], api_name=False, ).then( fn=fetch_previous_revision, inputs=[page_title, number_behind, units_behind, new_revision, context], outputs=[old_revision, old_timestamp], api_name=False, ).then( fn=run_heuristic_classifier, inputs=[old_revision, new_revision, context], outputs=[heuristic_noteworthy, heuristic_rationale], api_name=False, ).then( fn=run_fewshot_classifier, inputs=[old_revision, new_revision, context], outputs=[fewshot_noteworthy, fewshot_rationale], api_name=False, ).then( fn=run_judge, inputs=[ old_revision, new_revision, heuristic_noteworthy, fewshot_noteworthy, heuristic_rationale, fewshot_rationale, context, ], outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence_score], api_name=False, ) # Rerun model when rerun button is clicked gr.on( triggers=[rerun_btn.click], fn=run_heuristic_classifier, inputs=[old_revision, new_revision, context], outputs=[heuristic_noteworthy, heuristic_rationale], api_name=False, ).then( fn=run_fewshot_classifier, inputs=[old_revision, new_revision, context], outputs=[fewshot_noteworthy, fewshot_rationale], api_name=False, ).then( fn=run_judge, inputs=[ old_revision, new_revision, heuristic_noteworthy, fewshot_noteworthy, heuristic_rationale, fewshot_rationale, context, ], outputs=[judge_noteworthy, noteworthy_text, judge_reasoning, confidence_score], api_name=False, ) # Feedback button handlers thumbs_up_btn.click( fn=save_feedback_agree, inputs=[ page_title, number_behind, units_behind, old_timestamp, new_timestamp, old_revision, new_revision, heuristic_rationale, fewshot_rationale, judge_reasoning, heuristic_noteworthy, fewshot_noteworthy, judge_noteworthy, confidence_score, ], api_name=False, ) thumbs_down_btn.click( fn=save_feedback_disagree, inputs=[ page_title, number_behind, units_behind, old_timestamp, new_timestamp, old_revision, new_revision, heuristic_rationale, fewshot_rationale, judge_reasoning, heuristic_noteworthy, fewshot_noteworthy, judge_noteworthy, confidence_score, ], api_name=False, ) # Special random button handler special_random_btn.click( fn=find_interesting_example, inputs=[number_behind, units_behind], outputs=[ page_title, new_revision, new_timestamp, old_revision, old_timestamp, heuristic_noteworthy, fewshot_noteworthy, judge_noteworthy, heuristic_rationale, fewshot_rationale, judge_reasoning, noteworthy_text, confidence_score, ], api_name=False, ) if __name__ == "__main__": # Setup theme without background image theme = gr.Theme.from_hub("NoCrypt/miku") theme.set(body_background_fill="#FFFFFF", body_background_fill_dark="#000000") # Define the HTML for Font Awesome head = '' # Use CSS to style table css = """ #intro-table {background-color: #eff6ff} table, tr, td { border: none; /* Removes all borders */ border-collapse: collapse; /* Ensures no gaps between cells */ } """ demo.launch(theme=theme, head=head, css=css)