Spaces:

ecaplan
/

splits

Running

App Files Files Community

Eylon Caplan commited on 11 days ago

Commit

58eaa88

1 Parent(s): 160827a

Add demo code and track PNGs with LFS

Browse files

Files changed (3) hide show

.gitattributes +1 -0
app.py +88 -52
cat_splits.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -91,6 +91,27 @@ TITLE_ICON_HTML = (
     if CAT_ICON_SRC else ""
 )
 def get_topics_for_demos(target, contrast):
     if not os.path.exists(INDEX_DIR):
         return [("No indices found", "")]
@@ -143,16 +164,17 @@ def generate_verdict_banner(lift, pval, triviality):
     is_trivial = triviality >= TRIVIALITY_THRESHOLD
     is_sig = lift > 1.0 and pval < 0.05
-    base_style = "padding:20px; border-radius:12px; text-align:center; box-shadow: 0 4px 6px rgba(0,0,0,0.1);"
     if is_trivial and is_sig:
-        return f"<div style='{base_style} background-color:#fff3cd; border: 1px solid #ffe69c;'><h2 style='margin:0; color:#856404;'>🟡 Supported, but Trivial</h2><p style='margin-top:10px; color:#856404; font-size:16px;'>This lexicon successfully isolates the target demographic, but it is likely <strong style='color: inherit;'>too obvious/definitional</strong> to be of interest.</p></div>"
     elif is_trivial and not is_sig:
-        return f"<div style='{base_style} background-color:#f8d7da; border: 1px solid #f5c6cb;'><h2 style='margin:0; color:#721c24;'>🔴 Trivial & Unsupported</h2><p style='margin-top:10px; color:#721c24; font-size:16px;'>This lexicon is <strong style='color: inherit;'>definitional</strong> to the target demographic, and also failed to provide significant lift for the target demographic.</p></div>"
     elif not is_trivial and is_sig:
-        return f"<div style='{base_style} background-color:#d4edda; border: 1px solid #c3e6cb;'><h2 style='margin:0; color:#155724;'>🟢 Promising PSLP!</h2><p style='margin-top:10px; color:#155724; font-size:16px;'>This hypothesis is <strong style='color: inherit;'>supported by the data</strong> (high lift) AND is <strong style='color: inherit;'>unexpected</strong> (low triviality). Worthy of further study.</p></div>"
     else:
-        return f"<div style='{base_style} background-color:#e2e3e5; border: 1px solid #d6d8db;'><h2 style='margin:0; color:#383d41;'>⚪ Unsupported Hypothesis</h2><p style='margin-top:10px; color:#383d41; font-size:16px;'>This lexicon is non-trivial, but the data <strong style='color: inherit;'>does not support</strong> the hypothesis (it distinguishes the demographics no better than random).</p></div>"
 def format_demo(name):
     if not name:
@@ -176,13 +198,13 @@ def run_evaluation(index_name, target_demo, contrast_demo, generated_words_str):
         lift_0_5_percent = lift_at_k(df_results, target_demo, k=0.005)
         pval_0_5, _, _ = lift_ci(df_results, target_demo, k=0.005)
-        card_style = "background-color:#ffffff; padding:20px; border-radius:12px; border: 1px solid #e0e0e0; box-shadow: 0 2px 4px rgba(0,0,0,0.05); height: 100%;"
-        lift_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color:#2c3e50;'>Lift@0.5%: <span style='color:#007bff;'>{lift_0_5_percent:.2f}x</span></h3><p style='margin:8px 0; color:#6c757d;'><strong style='color: inherit;'>p-value: {pval_0_5:.4f}</strong></p><p style='margin:0; font-size:14px; color:#495057;'>This lexicon pulled <b style='color: inherit;'>{target_fmt}</b> posts to the top {lift_0_5_percent:.2f}x more than random compared to <b style='color: inherit;'>{contrast_fmt}</b>.</p></div>"
         # 2. Compute Triviality
         triviality = compute_triviality(generated_words, target_demo)
-        triv_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color:#2c3e50;'>Triviality Score: <span style='color:#6f42c1;'>{triviality:.3f}</span></h3><p style='margin:8px 0; color:#6c757d;'><strong style='color: inherit;'>Threshold: < {TRIVIALITY_THRESHOLD}</strong></p><p style='margin:0; font-size:14px; color:#495057;'>Measures semantic similarity to the seed words of <b style='color: inherit;'>{target_fmt}</b>. Lower scores indicate a likely more unexpected PSLP.</p></div>"
         verdict = generate_verdict_banner(lift_0_5_percent, pval_0_5, triviality)
@@ -271,70 +293,84 @@ except Exception as e:
 tab3_plot = load_tab3_data()
 # Utilize a clean, modern Gradio Theme
-with gr.Blocks(title="Splits! Sandbox", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as demo:
     gr.Markdown(f"""
-        <h1 style='display:flex; align-items:center; gap:0.35em; margin:0 0 0.4em 0;'>
             {TITLE_ICON_HTML}
             <span>Splits! Language & Culture Sandbox</span>
         </h1>
-        📄 <a href="https://arxiv.org/abs/2504.04640" target="_blank" style="font-weight: bold;">Read the full paper: SPLITS! Flexible Sociocultural Linguistic Investigation at Scale</a>
-        Welcome to the companion demo for **Splits!**
         ### 🤔 What is this?
-        The way we speak is heavily influenced by our background and culture. This tool lets you explore how different groups of people (like teachers, construction workers, or people of different faiths) use entirely different vocabularies to discuss the *exact same topics*.
         ### 🛠️ What can it do?
-        Think of it as a search engine for testing cultural language trends. You can test your own guesses (hypotheses) about how people talk. For example, if both Jewish and Catholic people are talking about "Healthcare", do they focus on different things?
         You pick the groups, pick a topic, and type in some words. The tool will then crunch the data and tell you two things:
         1. 📊 **Is it true? (Lift):** Does your chosen group *actually* use these words more than the other group?
-        2. 💡 **Is it interesting? (Triviality):** Are these words an unexpected, deep cultural insight? Or are they just boring, obvious terms (like a Catholic person using the word "Catholic")?
-        ---
-        **How to use:**
         1. Select a **Target** group, a **Contrast** group, and a **Discussion Topic**.
         2. Provide a **Candidate Lexicon** (a list of words you guess the target group uses *more* than the contrast group for this topic).
         3. Click **Test Hypothesis** to see if the data supports your idea!
         """)
-    with gr.Group():
-        with gr.Row():
-            target_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="🎯 Target Demographic", value="black", scale=2)
-            swap_btn = gr.Button("🔄 Swap", scale=1, min_width=80)
-            contrast_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="⚖️ Contrast Demographic", value="teacher", scale=2)
-        with gr.Row():
-            choices = get_topics_for_demos("black", "teacher")
-            default_idx = choices[0][1] if choices and choices[0][1] != "" else None
-            index_dropdown = gr.Dropdown(choices=choices, label="💬 Discussion Topic (Context)", value=default_idx, scale=3)
-    with gr.Group():
-        lexicon_input = gr.Textbox(
-            label="Candidate Lexicon (PSLP)",
-            placeholder="e.g. word1, phrase two, word3...",
-            info="Enter a comma-separated list of words/phrases you hypothesize the Target uses more than the Contrast in the selected Topic."
-        )
-        with gr.Row():
-            quick_triv = gr.Button("Auto-fill Trivial Lexicon (Obvious)")
-            quick_creat = gr.Button("Auto-fill Creative Lexicon (LLM Generated)")
-            clear_btn = gr.Button("Clear", variant="secondary")
     test_btn = gr.Button("🚀 Test Hypothesis!", variant="primary", size="lg")
     gr.Markdown("---")
     verdict_out = gr.HTML()
     with gr.Row():
         lift_out = gr.HTML()
         triv_out = gr.HTML()
     plot_out = gr.Plot(value=tab3_plot)
     with gr.Accordion("🔍 View Top Retrieved Posts (Contextualize the Lexicon)", open=False):
         posts_out = gr.Dataframe(headers=["ID", "BM25 Score", "Demographic", "Content"], interactive=False)

     if CAT_ICON_SRC else ""
 )
+BUTTON_FILL_CSS = """
+.lexicon-action-row {
+    align-items: stretch !important;
+}
+#quick-triv-btn,
+#quick-creat-btn,
+#clear-btn {
+    display: flex;
+    align-self: stretch !important;
+}
+#quick-triv-btn button,
+#quick-creat-btn button,
+#clear-btn button {
+    width: 100%;
+    min-height: 44px;
+    height: 100% !important;
+}
+"""
 def get_topics_for_demos(target, contrast):
     if not os.path.exists(INDEX_DIR):
         return [("No indices found", "")]
     is_trivial = triviality >= TRIVIALITY_THRESHOLD
     is_sig = lift > 1.0 and pval < 0.05
+    # Use Gradio CSS vars and neutral styling so cards work in light and dark mode.
+    base_style = "padding:20px; border-radius:12px; text-align:center; box-shadow: 0 4px 6px rgba(0,0,0,0.1); margin-bottom: 15px;"
     if is_trivial and is_sig:
+        return f"<div style='{base_style} background-color: rgba(255, 193, 7, 0.2); border: 1px solid #ffe69c;'><h2 style='margin:0; color: #d39e00;'>🟡 Supported, but Trivial</h2><p style='margin-top:10px; font-size:16px;'>This lexicon successfully isolates the target demographic, but it is likely <strong>too obvious/definitional</strong> to be of interest.</p></div>"
     elif is_trivial and not is_sig:
+        return f"<div style='{base_style} background-color: rgba(220, 53, 69, 0.2); border: 1px solid #f5c6cb;'><h2 style='margin:0; color: #c82333;'>🔴 Trivial & Unsupported</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is <strong>definitional</strong> to the target demographic, and also failed to provide significant lift for the target demographic.</p></div>"
     elif not is_trivial and is_sig:
+        return f"<div style='{base_style} background-color: rgba(40, 167, 69, 0.2); border: 1px solid #c3e6cb;'><h2 style='margin:0; color: #218838;'>🟢 Promising PSLP!</h2><p style='margin-top:10px; font-size:16px;'>This hypothesis is <strong>supported by the data</strong> (high lift) AND is <strong>unexpected</strong> (low triviality). Worthy of further study.</p></div>"
     else:
+        return f"<div style='{base_style} background-color: rgba(108, 117, 125, 0.2); border: 1px solid #d6d8db;'><h2 style='margin:0; color: var(--body-text-color);'>⚪ Unsupported Hypothesis</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is non-trivial, but the data <strong>does not support</strong> the hypothesis (it distinguishes the demographics no better than random).</p></div>"
 def format_demo(name):
     if not name:
         lift_0_5_percent = lift_at_k(df_results, target_demo, k=0.005)
         pval_0_5, _, _ = lift_ci(df_results, target_demo, k=0.005)
+        card_style = "background-color: var(--block-background-fill); padding:20px; border-radius:12px; border: 1px solid var(--border-color-primary); box-shadow: 0 2px 4px rgba(0,0,0,0.05); height: 100%;"
+        lift_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Lift@0.5%: <span style='color:#007bff;'>{lift_0_5_percent:.2f}x</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>p-value: {pval_0_5:.4f}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>This lexicon pulled <b style='color: inherit;'>{target_fmt}</b> posts to the top {lift_0_5_percent:.2f}x more than random compared to <b style='color: inherit;'>{contrast_fmt}</b>.</p></div>"
         # 2. Compute Triviality
         triviality = compute_triviality(generated_words, target_demo)
+        triv_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Triviality Score: <span style='color:#6f42c1;'>{triviality:.3f}</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>Threshold: &lt; {TRIVIALITY_THRESHOLD}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>Measures semantic similarity to the seed words of <b style='color: inherit;'>{target_fmt}</b>. Lower scores indicate a likely more unexpected PSLP.</p></div>"
         verdict = generate_verdict_banner(lift_0_5_percent, pval_0_5, triviality)
 tab3_plot = load_tab3_data()
 # Utilize a clean, modern Gradio Theme
+with gr.Blocks(title="Splits! Sandbox", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=BUTTON_FILL_CSS) as demo:
+    # --- HEADER ---
     gr.Markdown(f"""
+        <h1 style='display:flex; align-items:center; gap:0.35em; margin:0 0 0.2em 0;'>
             {TITLE_ICON_HTML}
             <span>Splits! Language & Culture Sandbox</span>
         </h1>
+        Welcome to the companion demo for **Splits!** Explore how different sociocultural groups use entirely different vocabularies to discuss the *exact same topics*.
+        <div style="margin-top: 16px; margin-bottom: 8px;">
+            <a href="https://arxiv.org/abs/2504.04640" target="_blank"
+               style="display: inline-flex; align-items: center; gap: 8px; background-color: var(--button-secondary-background-fill); border: 1px solid var(--border-color-primary); border-radius: 999px; padding: 8px 18px; text-decoration: none; color: var(--body-text-color); font-weight: 500; font-size: 0.95em; box-shadow: 0 1px 3px rgba(0,0,0,0.05);">
+                📄 <b>Read the paper:</b> Splits! Flexible Sociocultural Linguistic Investigation at Scale
+                <span style="color: var(--body-text-color-subdued); margin-left: 4px;">↗</span>
+            </a>
+        </div>
+    """)
+    with gr.Accordion("📖 What is this & How to use it?", open=False):
+        gr.Markdown("""
         ### 🤔 What is this?
+        The way we speak is heavily influenced by our background and culture. This tool lets you explore how different groups of people (like teachers, construction workers, or people of different faiths) use entirely different vocabularies to discuss the *same topics*.
         ### 🛠️ What can it do?
+        Think of it as a search tool for testing cultural language trends. You can test your own guesses (hypotheses) about how people talk. For example, if both Jewish and Catholic people are talking about "Healthcare", do they focus on different things?
         You pick the groups, pick a topic, and type in some words. The tool will then crunch the data and tell you two things:
         1. 📊 **Is it true? (Lift):** Does your chosen group *actually* use these words more than the other group?
+        2. 💡 **Is it interesting? (Triviality):** Are these words an unexpected, deep cultural insight? Or are they just boring, obvious terms (like a Catholic person using the word "church")?
+        ### ⚙️ How to use
         1. Select a **Target** group, a **Contrast** group, and a **Discussion Topic**.
         2. Provide a **Candidate Lexicon** (a list of words you guess the target group uses *more* than the contrast group for this topic).
         3. Click **Test Hypothesis** to see if the data supports your idea!
         """)
+    # --- INPUTS ---
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Step 1: Set the Context")
+            with gr.Group():
+                with gr.Row():
+                    target_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="🎯 Target Demo", value="black", scale=2)
+                    swap_btn = gr.Button("🔄 Swap", scale=1, min_width=60)
+                    contrast_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="⚖️ Contrast Demo", value="teacher", scale=2)
+                choices = get_topics_for_demos("black", "teacher")
+                default_idx = choices[0][1] if choices and choices[0][1] != "" else None
+                index_dropdown = gr.Dropdown(choices=choices, label="💬 Discussion Topic", value=default_idx)
+        with gr.Column(scale=1):
+            gr.Markdown("### 📝 Step 2: Define Lexicon")
+            with gr.Group():
+                lexicon_input = gr.Textbox(
+                    show_label=False,
+                    placeholder="e.g. word1, phrase two, word3...",
+                    info="Enter a comma-separated list of words/phrases you hypothesize the Target uses more than the Contrast in the selected Topic.",
+                    lines=3
+                )
+                with gr.Row(elem_classes=["lexicon-action-row"]):
+                    quick_triv = gr.Button("🟡 Auto-fill Trivial (Obvious)", elem_id="quick-triv-btn", scale=1)
+                    quick_creat = gr.Button("✨ Auto-fill Creative (LLM-Generated)", elem_id="quick-creat-btn", scale=1)
+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary", elem_id="clear-btn", scale=1)
     test_btn = gr.Button("🚀 Test Hypothesis!", variant="primary", size="lg")
     gr.Markdown("---")
     verdict_out = gr.HTML()
     with gr.Row():
         lift_out = gr.HTML()
         triv_out = gr.HTML()
     plot_out = gr.Plot(value=tab3_plot)
     with gr.Accordion("🔍 View Top Retrieved Posts (Contextualize the Lexicon)", open=False):
         posts_out = gr.Dataframe(headers=["ID", "BM25 Score", "Demographic", "Content"], interactive=False)

cat_splits.png ADDED Viewed

Git LFS Details

SHA256: 93f5446223ee5d85e4369a8bac3b136154cea302c329efe859122cfc8bd692dd
Pointer size: 132 Bytes
Size of remote file: 2.44 MB