Eylon Caplan commited on
Commit ·
58eaa88
1
Parent(s): 160827a
Add demo code and track PNGs with LFS
Browse files- .gitattributes +1 -0
- app.py +88 -52
- cat_splits.png +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -91,6 +91,27 @@ TITLE_ICON_HTML = (
|
|
| 91 |
if CAT_ICON_SRC else ""
|
| 92 |
)
|
| 93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
def get_topics_for_demos(target, contrast):
|
| 95 |
if not os.path.exists(INDEX_DIR):
|
| 96 |
return [("No indices found", "")]
|
|
@@ -143,16 +164,17 @@ def generate_verdict_banner(lift, pval, triviality):
|
|
| 143 |
is_trivial = triviality >= TRIVIALITY_THRESHOLD
|
| 144 |
is_sig = lift > 1.0 and pval < 0.05
|
| 145 |
|
| 146 |
-
|
|
|
|
| 147 |
|
| 148 |
if is_trivial and is_sig:
|
| 149 |
-
return f"<div style='{base_style} background-color:
|
| 150 |
elif is_trivial and not is_sig:
|
| 151 |
-
return f"<div style='{base_style} background-color:
|
| 152 |
elif not is_trivial and is_sig:
|
| 153 |
-
return f"<div style='{base_style} background-color:
|
| 154 |
else:
|
| 155 |
-
return f"<div style='{base_style} background-color:
|
| 156 |
|
| 157 |
def format_demo(name):
|
| 158 |
if not name:
|
|
@@ -176,13 +198,13 @@ def run_evaluation(index_name, target_demo, contrast_demo, generated_words_str):
|
|
| 176 |
lift_0_5_percent = lift_at_k(df_results, target_demo, k=0.005)
|
| 177 |
pval_0_5, _, _ = lift_ci(df_results, target_demo, k=0.005)
|
| 178 |
|
| 179 |
-
card_style = "background-color:
|
| 180 |
|
| 181 |
-
lift_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color:
|
| 182 |
|
| 183 |
# 2. Compute Triviality
|
| 184 |
triviality = compute_triviality(generated_words, target_demo)
|
| 185 |
-
triv_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color:
|
| 186 |
|
| 187 |
verdict = generate_verdict_banner(lift_0_5_percent, pval_0_5, triviality)
|
| 188 |
|
|
@@ -271,70 +293,84 @@ except Exception as e:
|
|
| 271 |
tab3_plot = load_tab3_data()
|
| 272 |
|
| 273 |
# Utilize a clean, modern Gradio Theme
|
| 274 |
-
with gr.Blocks(title="Splits! Sandbox", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as demo:
|
| 275 |
-
|
| 276 |
gr.Markdown(f"""
|
| 277 |
-
<h1 style='display:flex; align-items:center; gap:0.35em; margin:0 0 0.
|
| 278 |
{TITLE_ICON_HTML}
|
| 279 |
<span>Splits! Language & Culture Sandbox</span>
|
| 280 |
</h1>
|
| 281 |
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
### 🤔 What is this?
|
| 287 |
-
The way we speak is heavily influenced by our background and culture. This tool lets you explore how different groups of people (like teachers, construction workers, or people of different faiths) use entirely different vocabularies to discuss the *
|
| 288 |
-
|
| 289 |
### 🛠️ What can it do?
|
| 290 |
-
Think of it as a search
|
| 291 |
-
|
| 292 |
You pick the groups, pick a topic, and type in some words. The tool will then crunch the data and tell you two things:
|
| 293 |
1. 📊 **Is it true? (Lift):** Does your chosen group *actually* use these words more than the other group?
|
| 294 |
-
2. 💡 **Is it interesting? (Triviality):** Are these words an unexpected, deep cultural insight? Or are they just boring, obvious terms (like a Catholic person using the word "
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
**How to use:**
|
| 299 |
1. Select a **Target** group, a **Contrast** group, and a **Discussion Topic**.
|
| 300 |
2. Provide a **Candidate Lexicon** (a list of words you guess the target group uses *more* than the contrast group for this topic).
|
| 301 |
3. Click **Test Hypothesis** to see if the data supports your idea!
|
|
|
|
| 302 |
""")
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
test_btn = gr.Button("🚀 Test Hypothesis!", variant="primary", size="lg")
|
| 327 |
-
|
| 328 |
gr.Markdown("---")
|
| 329 |
-
|
| 330 |
verdict_out = gr.HTML()
|
| 331 |
-
|
| 332 |
with gr.Row():
|
| 333 |
lift_out = gr.HTML()
|
| 334 |
triv_out = gr.HTML()
|
| 335 |
-
|
| 336 |
plot_out = gr.Plot(value=tab3_plot)
|
| 337 |
-
|
| 338 |
with gr.Accordion("🔍 View Top Retrieved Posts (Contextualize the Lexicon)", open=False):
|
| 339 |
posts_out = gr.Dataframe(headers=["ID", "BM25 Score", "Demographic", "Content"], interactive=False)
|
| 340 |
|
|
|
|
| 91 |
if CAT_ICON_SRC else ""
|
| 92 |
)
|
| 93 |
|
| 94 |
+
BUTTON_FILL_CSS = """
|
| 95 |
+
.lexicon-action-row {
|
| 96 |
+
align-items: stretch !important;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
#quick-triv-btn,
|
| 100 |
+
#quick-creat-btn,
|
| 101 |
+
#clear-btn {
|
| 102 |
+
display: flex;
|
| 103 |
+
align-self: stretch !important;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
#quick-triv-btn button,
|
| 107 |
+
#quick-creat-btn button,
|
| 108 |
+
#clear-btn button {
|
| 109 |
+
width: 100%;
|
| 110 |
+
min-height: 44px;
|
| 111 |
+
height: 100% !important;
|
| 112 |
+
}
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
def get_topics_for_demos(target, contrast):
|
| 116 |
if not os.path.exists(INDEX_DIR):
|
| 117 |
return [("No indices found", "")]
|
|
|
|
| 164 |
is_trivial = triviality >= TRIVIALITY_THRESHOLD
|
| 165 |
is_sig = lift > 1.0 and pval < 0.05
|
| 166 |
|
| 167 |
+
# Use Gradio CSS vars and neutral styling so cards work in light and dark mode.
|
| 168 |
+
base_style = "padding:20px; border-radius:12px; text-align:center; box-shadow: 0 4px 6px rgba(0,0,0,0.1); margin-bottom: 15px;"
|
| 169 |
|
| 170 |
if is_trivial and is_sig:
|
| 171 |
+
return f"<div style='{base_style} background-color: rgba(255, 193, 7, 0.2); border: 1px solid #ffe69c;'><h2 style='margin:0; color: #d39e00;'>🟡 Supported, but Trivial</h2><p style='margin-top:10px; font-size:16px;'>This lexicon successfully isolates the target demographic, but it is likely <strong>too obvious/definitional</strong> to be of interest.</p></div>"
|
| 172 |
elif is_trivial and not is_sig:
|
| 173 |
+
return f"<div style='{base_style} background-color: rgba(220, 53, 69, 0.2); border: 1px solid #f5c6cb;'><h2 style='margin:0; color: #c82333;'>🔴 Trivial & Unsupported</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is <strong>definitional</strong> to the target demographic, and also failed to provide significant lift for the target demographic.</p></div>"
|
| 174 |
elif not is_trivial and is_sig:
|
| 175 |
+
return f"<div style='{base_style} background-color: rgba(40, 167, 69, 0.2); border: 1px solid #c3e6cb;'><h2 style='margin:0; color: #218838;'>🟢 Promising PSLP!</h2><p style='margin-top:10px; font-size:16px;'>This hypothesis is <strong>supported by the data</strong> (high lift) AND is <strong>unexpected</strong> (low triviality). Worthy of further study.</p></div>"
|
| 176 |
else:
|
| 177 |
+
return f"<div style='{base_style} background-color: rgba(108, 117, 125, 0.2); border: 1px solid #d6d8db;'><h2 style='margin:0; color: var(--body-text-color);'>⚪ Unsupported Hypothesis</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is non-trivial, but the data <strong>does not support</strong> the hypothesis (it distinguishes the demographics no better than random).</p></div>"
|
| 178 |
|
| 179 |
def format_demo(name):
|
| 180 |
if not name:
|
|
|
|
| 198 |
lift_0_5_percent = lift_at_k(df_results, target_demo, k=0.005)
|
| 199 |
pval_0_5, _, _ = lift_ci(df_results, target_demo, k=0.005)
|
| 200 |
|
| 201 |
+
card_style = "background-color: var(--block-background-fill); padding:20px; border-radius:12px; border: 1px solid var(--border-color-primary); box-shadow: 0 2px 4px rgba(0,0,0,0.05); height: 100%;"
|
| 202 |
|
| 203 |
+
lift_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Lift@0.5%: <span style='color:#007bff;'>{lift_0_5_percent:.2f}x</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>p-value: {pval_0_5:.4f}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>This lexicon pulled <b style='color: inherit;'>{target_fmt}</b> posts to the top {lift_0_5_percent:.2f}x more than random compared to <b style='color: inherit;'>{contrast_fmt}</b>.</p></div>"
|
| 204 |
|
| 205 |
# 2. Compute Triviality
|
| 206 |
triviality = compute_triviality(generated_words, target_demo)
|
| 207 |
+
triv_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Triviality Score: <span style='color:#6f42c1;'>{triviality:.3f}</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>Threshold: < {TRIVIALITY_THRESHOLD}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>Measures semantic similarity to the seed words of <b style='color: inherit;'>{target_fmt}</b>. Lower scores indicate a likely more unexpected PSLP.</p></div>"
|
| 208 |
|
| 209 |
verdict = generate_verdict_banner(lift_0_5_percent, pval_0_5, triviality)
|
| 210 |
|
|
|
|
| 293 |
tab3_plot = load_tab3_data()
|
| 294 |
|
| 295 |
# Utilize a clean, modern Gradio Theme
|
| 296 |
+
with gr.Blocks(title="Splits! Sandbox", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=BUTTON_FILL_CSS) as demo:
|
| 297 |
+
# --- HEADER ---
|
| 298 |
gr.Markdown(f"""
|
| 299 |
+
<h1 style='display:flex; align-items:center; gap:0.35em; margin:0 0 0.2em 0;'>
|
| 300 |
{TITLE_ICON_HTML}
|
| 301 |
<span>Splits! Language & Culture Sandbox</span>
|
| 302 |
</h1>
|
| 303 |
|
| 304 |
+
Welcome to the companion demo for **Splits!** Explore how different sociocultural groups use entirely different vocabularies to discuss the *exact same topics*.
|
| 305 |
+
|
| 306 |
+
<div style="margin-top: 16px; margin-bottom: 8px;">
|
| 307 |
+
<a href="https://arxiv.org/abs/2504.04640" target="_blank"
|
| 308 |
+
style="display: inline-flex; align-items: center; gap: 8px; background-color: var(--button-secondary-background-fill); border: 1px solid var(--border-color-primary); border-radius: 999px; padding: 8px 18px; text-decoration: none; color: var(--body-text-color); font-weight: 500; font-size: 0.95em; box-shadow: 0 1px 3px rgba(0,0,0,0.05);">
|
| 309 |
+
📄 <b>Read the paper:</b> Splits! Flexible Sociocultural Linguistic Investigation at Scale
|
| 310 |
+
<span style="color: var(--body-text-color-subdued); margin-left: 4px;">↗</span>
|
| 311 |
+
</a>
|
| 312 |
+
</div>
|
| 313 |
+
""")
|
| 314 |
+
|
| 315 |
+
with gr.Accordion("📖 What is this & How to use it?", open=False):
|
| 316 |
+
gr.Markdown("""
|
| 317 |
### 🤔 What is this?
|
| 318 |
+
The way we speak is heavily influenced by our background and culture. This tool lets you explore how different groups of people (like teachers, construction workers, or people of different faiths) use entirely different vocabularies to discuss the *same topics*.
|
| 319 |
+
|
| 320 |
### 🛠️ What can it do?
|
| 321 |
+
Think of it as a search tool for testing cultural language trends. You can test your own guesses (hypotheses) about how people talk. For example, if both Jewish and Catholic people are talking about "Healthcare", do they focus on different things?
|
| 322 |
+
|
| 323 |
You pick the groups, pick a topic, and type in some words. The tool will then crunch the data and tell you two things:
|
| 324 |
1. 📊 **Is it true? (Lift):** Does your chosen group *actually* use these words more than the other group?
|
| 325 |
+
2. 💡 **Is it interesting? (Triviality):** Are these words an unexpected, deep cultural insight? Or are they just boring, obvious terms (like a Catholic person using the word "church")?
|
| 326 |
+
|
| 327 |
+
### ⚙️ How to use
|
|
|
|
|
|
|
| 328 |
1. Select a **Target** group, a **Contrast** group, and a **Discussion Topic**.
|
| 329 |
2. Provide a **Candidate Lexicon** (a list of words you guess the target group uses *more* than the contrast group for this topic).
|
| 330 |
3. Click **Test Hypothesis** to see if the data supports your idea!
|
| 331 |
+
|
| 332 |
""")
|
| 333 |
+
|
| 334 |
+
# --- INPUTS ---
|
| 335 |
+
with gr.Row():
|
| 336 |
+
with gr.Column(scale=1):
|
| 337 |
+
gr.Markdown("### ⚙️ Step 1: Set the Context")
|
| 338 |
+
with gr.Group():
|
| 339 |
+
with gr.Row():
|
| 340 |
+
target_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="🎯 Target Demo", value="black", scale=2)
|
| 341 |
+
swap_btn = gr.Button("🔄 Swap", scale=1, min_width=60)
|
| 342 |
+
contrast_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="⚖️ Contrast Demo", value="teacher", scale=2)
|
| 343 |
+
|
| 344 |
+
choices = get_topics_for_demos("black", "teacher")
|
| 345 |
+
default_idx = choices[0][1] if choices and choices[0][1] != "" else None
|
| 346 |
+
index_dropdown = gr.Dropdown(choices=choices, label="💬 Discussion Topic", value=default_idx)
|
| 347 |
+
|
| 348 |
+
with gr.Column(scale=1):
|
| 349 |
+
gr.Markdown("### 📝 Step 2: Define Lexicon")
|
| 350 |
+
with gr.Group():
|
| 351 |
+
lexicon_input = gr.Textbox(
|
| 352 |
+
show_label=False,
|
| 353 |
+
placeholder="e.g. word1, phrase two, word3...",
|
| 354 |
+
info="Enter a comma-separated list of words/phrases you hypothesize the Target uses more than the Contrast in the selected Topic.",
|
| 355 |
+
lines=3
|
| 356 |
+
)
|
| 357 |
+
with gr.Row(elem_classes=["lexicon-action-row"]):
|
| 358 |
+
quick_triv = gr.Button("🟡 Auto-fill Trivial (Obvious)", elem_id="quick-triv-btn", scale=1)
|
| 359 |
+
quick_creat = gr.Button("✨ Auto-fill Creative (LLM-Generated)", elem_id="quick-creat-btn", scale=1)
|
| 360 |
+
clear_btn = gr.Button("🗑️ Clear", variant="secondary", elem_id="clear-btn", scale=1)
|
| 361 |
+
|
| 362 |
test_btn = gr.Button("🚀 Test Hypothesis!", variant="primary", size="lg")
|
| 363 |
+
|
| 364 |
gr.Markdown("---")
|
| 365 |
+
|
| 366 |
verdict_out = gr.HTML()
|
| 367 |
+
|
| 368 |
with gr.Row():
|
| 369 |
lift_out = gr.HTML()
|
| 370 |
triv_out = gr.HTML()
|
| 371 |
+
|
| 372 |
plot_out = gr.Plot(value=tab3_plot)
|
| 373 |
+
|
| 374 |
with gr.Accordion("🔍 View Top Retrieved Posts (Contextualize the Lexicon)", open=False):
|
| 375 |
posts_out = gr.Dataframe(headers=["ID", "BM25 Score", "Demographic", "Content"], interactive=False)
|
| 376 |
|
cat_splits.png
ADDED
|
Git LFS Details
|