Eylon Caplan commited on
Commit
58eaa88
·
1 Parent(s): 160827a

Add demo code and track PNGs with LFS

Browse files
Files changed (3) hide show
  1. .gitattributes +1 -0
  2. app.py +88 -52
  3. cat_splits.png +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -91,6 +91,27 @@ TITLE_ICON_HTML = (
91
  if CAT_ICON_SRC else ""
92
  )
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def get_topics_for_demos(target, contrast):
95
  if not os.path.exists(INDEX_DIR):
96
  return [("No indices found", "")]
@@ -143,16 +164,17 @@ def generate_verdict_banner(lift, pval, triviality):
143
  is_trivial = triviality >= TRIVIALITY_THRESHOLD
144
  is_sig = lift > 1.0 and pval < 0.05
145
 
146
- base_style = "padding:20px; border-radius:12px; text-align:center; box-shadow: 0 4px 6px rgba(0,0,0,0.1);"
 
147
 
148
  if is_trivial and is_sig:
149
- return f"<div style='{base_style} background-color:#fff3cd; border: 1px solid #ffe69c;'><h2 style='margin:0; color:#856404;'>🟡 Supported, but Trivial</h2><p style='margin-top:10px; color:#856404; font-size:16px;'>This lexicon successfully isolates the target demographic, but it is likely <strong style='color: inherit;'>too obvious/definitional</strong> to be of interest.</p></div>"
150
  elif is_trivial and not is_sig:
151
- return f"<div style='{base_style} background-color:#f8d7da; border: 1px solid #f5c6cb;'><h2 style='margin:0; color:#721c24;'>🔴 Trivial & Unsupported</h2><p style='margin-top:10px; color:#721c24; font-size:16px;'>This lexicon is <strong style='color: inherit;'>definitional</strong> to the target demographic, and also failed to provide significant lift for the target demographic.</p></div>"
152
  elif not is_trivial and is_sig:
153
- return f"<div style='{base_style} background-color:#d4edda; border: 1px solid #c3e6cb;'><h2 style='margin:0; color:#155724;'>🟢 Promising PSLP!</h2><p style='margin-top:10px; color:#155724; font-size:16px;'>This hypothesis is <strong style='color: inherit;'>supported by the data</strong> (high lift) AND is <strong style='color: inherit;'>unexpected</strong> (low triviality). Worthy of further study.</p></div>"
154
  else:
155
- return f"<div style='{base_style} background-color:#e2e3e5; border: 1px solid #d6d8db;'><h2 style='margin:0; color:#383d41;'>⚪ Unsupported Hypothesis</h2><p style='margin-top:10px; color:#383d41; font-size:16px;'>This lexicon is non-trivial, but the data <strong style='color: inherit;'>does not support</strong> the hypothesis (it distinguishes the demographics no better than random).</p></div>"
156
 
157
  def format_demo(name):
158
  if not name:
@@ -176,13 +198,13 @@ def run_evaluation(index_name, target_demo, contrast_demo, generated_words_str):
176
  lift_0_5_percent = lift_at_k(df_results, target_demo, k=0.005)
177
  pval_0_5, _, _ = lift_ci(df_results, target_demo, k=0.005)
178
 
179
- card_style = "background-color:#ffffff; padding:20px; border-radius:12px; border: 1px solid #e0e0e0; box-shadow: 0 2px 4px rgba(0,0,0,0.05); height: 100%;"
180
 
181
- lift_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color:#2c3e50;'>Lift@0.5%: <span style='color:#007bff;'>{lift_0_5_percent:.2f}x</span></h3><p style='margin:8px 0; color:#6c757d;'><strong style='color: inherit;'>p-value: {pval_0_5:.4f}</strong></p><p style='margin:0; font-size:14px; color:#495057;'>This lexicon pulled <b style='color: inherit;'>{target_fmt}</b> posts to the top {lift_0_5_percent:.2f}x more than random compared to <b style='color: inherit;'>{contrast_fmt}</b>.</p></div>"
182
 
183
  # 2. Compute Triviality
184
  triviality = compute_triviality(generated_words, target_demo)
185
- triv_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color:#2c3e50;'>Triviality Score: <span style='color:#6f42c1;'>{triviality:.3f}</span></h3><p style='margin:8px 0; color:#6c757d;'><strong style='color: inherit;'>Threshold: < {TRIVIALITY_THRESHOLD}</strong></p><p style='margin:0; font-size:14px; color:#495057;'>Measures semantic similarity to the seed words of <b style='color: inherit;'>{target_fmt}</b>. Lower scores indicate a likely more unexpected PSLP.</p></div>"
186
 
187
  verdict = generate_verdict_banner(lift_0_5_percent, pval_0_5, triviality)
188
 
@@ -271,70 +293,84 @@ except Exception as e:
271
  tab3_plot = load_tab3_data()
272
 
273
  # Utilize a clean, modern Gradio Theme
274
- with gr.Blocks(title="Splits! Sandbox", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue")) as demo:
275
-
276
  gr.Markdown(f"""
277
- <h1 style='display:flex; align-items:center; gap:0.35em; margin:0 0 0.4em 0;'>
278
  {TITLE_ICON_HTML}
279
  <span>Splits! Language & Culture Sandbox</span>
280
  </h1>
281
 
282
- 📄 <a href="https://arxiv.org/abs/2504.04640" target="_blank" style="font-weight: bold;">Read the full paper: SPLITS! Flexible Sociocultural Linguistic Investigation at Scale</a>
283
-
284
- Welcome to the companion demo for **Splits!**
285
-
 
 
 
 
 
 
 
 
 
286
  ### 🤔 What is this?
287
- The way we speak is heavily influenced by our background and culture. This tool lets you explore how different groups of people (like teachers, construction workers, or people of different faiths) use entirely different vocabularies to discuss the *exact same topics*.
288
-
289
  ### 🛠️ What can it do?
290
- Think of it as a search engine for testing cultural language trends. You can test your own guesses (hypotheses) about how people talk. For example, if both Jewish and Catholic people are talking about "Healthcare", do they focus on different things?
291
-
292
  You pick the groups, pick a topic, and type in some words. The tool will then crunch the data and tell you two things:
293
  1. 📊 **Is it true? (Lift):** Does your chosen group *actually* use these words more than the other group?
294
- 2. 💡 **Is it interesting? (Triviality):** Are these words an unexpected, deep cultural insight? Or are they just boring, obvious terms (like a Catholic person using the word "Catholic")?
295
-
296
- ---
297
-
298
- **How to use:**
299
  1. Select a **Target** group, a **Contrast** group, and a **Discussion Topic**.
300
  2. Provide a **Candidate Lexicon** (a list of words you guess the target group uses *more* than the contrast group for this topic).
301
  3. Click **Test Hypothesis** to see if the data supports your idea!
 
302
  """)
303
-
304
- with gr.Group():
305
- with gr.Row():
306
- target_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="🎯 Target Demographic", value="black", scale=2)
307
- swap_btn = gr.Button("🔄 Swap", scale=1, min_width=80)
308
- contrast_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="⚖️ Contrast Demographic", value="teacher", scale=2)
309
-
310
- with gr.Row():
311
- choices = get_topics_for_demos("black", "teacher")
312
- default_idx = choices[0][1] if choices and choices[0][1] != "" else None
313
- index_dropdown = gr.Dropdown(choices=choices, label="💬 Discussion Topic (Context)", value=default_idx, scale=3)
314
-
315
- with gr.Group():
316
- lexicon_input = gr.Textbox(
317
- label="Candidate Lexicon (PSLP)",
318
- placeholder="e.g. word1, phrase two, word3...",
319
- info="Enter a comma-separated list of words/phrases you hypothesize the Target uses more than the Contrast in the selected Topic."
320
- )
321
- with gr.Row():
322
- quick_triv = gr.Button("Auto-fill Trivial Lexicon (Obvious)")
323
- quick_creat = gr.Button("Auto-fill Creative Lexicon (LLM Generated)")
324
- clear_btn = gr.Button("Clear", variant="secondary")
325
-
 
 
 
 
 
 
326
  test_btn = gr.Button("🚀 Test Hypothesis!", variant="primary", size="lg")
327
-
328
  gr.Markdown("---")
329
-
330
  verdict_out = gr.HTML()
331
-
332
  with gr.Row():
333
  lift_out = gr.HTML()
334
  triv_out = gr.HTML()
335
-
336
  plot_out = gr.Plot(value=tab3_plot)
337
-
338
  with gr.Accordion("🔍 View Top Retrieved Posts (Contextualize the Lexicon)", open=False):
339
  posts_out = gr.Dataframe(headers=["ID", "BM25 Score", "Demographic", "Content"], interactive=False)
340
 
 
91
  if CAT_ICON_SRC else ""
92
  )
93
 
94
+ BUTTON_FILL_CSS = """
95
+ .lexicon-action-row {
96
+ align-items: stretch !important;
97
+ }
98
+
99
+ #quick-triv-btn,
100
+ #quick-creat-btn,
101
+ #clear-btn {
102
+ display: flex;
103
+ align-self: stretch !important;
104
+ }
105
+
106
+ #quick-triv-btn button,
107
+ #quick-creat-btn button,
108
+ #clear-btn button {
109
+ width: 100%;
110
+ min-height: 44px;
111
+ height: 100% !important;
112
+ }
113
+ """
114
+
115
  def get_topics_for_demos(target, contrast):
116
  if not os.path.exists(INDEX_DIR):
117
  return [("No indices found", "")]
 
164
  is_trivial = triviality >= TRIVIALITY_THRESHOLD
165
  is_sig = lift > 1.0 and pval < 0.05
166
 
167
+ # Use Gradio CSS vars and neutral styling so cards work in light and dark mode.
168
+ base_style = "padding:20px; border-radius:12px; text-align:center; box-shadow: 0 4px 6px rgba(0,0,0,0.1); margin-bottom: 15px;"
169
 
170
  if is_trivial and is_sig:
171
+ return f"<div style='{base_style} background-color: rgba(255, 193, 7, 0.2); border: 1px solid #ffe69c;'><h2 style='margin:0; color: #d39e00;'>🟡 Supported, but Trivial</h2><p style='margin-top:10px; font-size:16px;'>This lexicon successfully isolates the target demographic, but it is likely <strong>too obvious/definitional</strong> to be of interest.</p></div>"
172
  elif is_trivial and not is_sig:
173
+ return f"<div style='{base_style} background-color: rgba(220, 53, 69, 0.2); border: 1px solid #f5c6cb;'><h2 style='margin:0; color: #c82333;'>🔴 Trivial & Unsupported</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is <strong>definitional</strong> to the target demographic, and also failed to provide significant lift for the target demographic.</p></div>"
174
  elif not is_trivial and is_sig:
175
+ return f"<div style='{base_style} background-color: rgba(40, 167, 69, 0.2); border: 1px solid #c3e6cb;'><h2 style='margin:0; color: #218838;'>🟢 Promising PSLP!</h2><p style='margin-top:10px; font-size:16px;'>This hypothesis is <strong>supported by the data</strong> (high lift) AND is <strong>unexpected</strong> (low triviality). Worthy of further study.</p></div>"
176
  else:
177
+ return f"<div style='{base_style} background-color: rgba(108, 117, 125, 0.2); border: 1px solid #d6d8db;'><h2 style='margin:0; color: var(--body-text-color);'>⚪ Unsupported Hypothesis</h2><p style='margin-top:10px; font-size:16px;'>This lexicon is non-trivial, but the data <strong>does not support</strong> the hypothesis (it distinguishes the demographics no better than random).</p></div>"
178
 
179
  def format_demo(name):
180
  if not name:
 
198
  lift_0_5_percent = lift_at_k(df_results, target_demo, k=0.005)
199
  pval_0_5, _, _ = lift_ci(df_results, target_demo, k=0.005)
200
 
201
+ card_style = "background-color: var(--block-background-fill); padding:20px; border-radius:12px; border: 1px solid var(--border-color-primary); box-shadow: 0 2px 4px rgba(0,0,0,0.05); height: 100%;"
202
 
203
+ lift_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Lift@0.5%: <span style='color:#007bff;'>{lift_0_5_percent:.2f}x</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>p-value: {pval_0_5:.4f}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>This lexicon pulled <b style='color: inherit;'>{target_fmt}</b> posts to the top {lift_0_5_percent:.2f}x more than random compared to <b style='color: inherit;'>{contrast_fmt}</b>.</p></div>"
204
 
205
  # 2. Compute Triviality
206
  triviality = compute_triviality(generated_words, target_demo)
207
+ triv_text = f"<div style='{card_style}'><h3 style='margin:0; font-size:24px; color: var(--body-text-color);'>Triviality Score: <span style='color:#6f42c1;'>{triviality:.3f}</span></h3><p style='margin:8px 0; color: var(--body-text-color-subdued);'><strong style='color: inherit;'>Threshold: &lt; {TRIVIALITY_THRESHOLD}</strong></p><p style='margin:0; font-size:14px; color: var(--body-text-color-subdued);'>Measures semantic similarity to the seed words of <b style='color: inherit;'>{target_fmt}</b>. Lower scores indicate a likely more unexpected PSLP.</p></div>"
208
 
209
  verdict = generate_verdict_banner(lift_0_5_percent, pval_0_5, triviality)
210
 
 
293
  tab3_plot = load_tab3_data()
294
 
295
  # Utilize a clean, modern Gradio Theme
296
+ with gr.Blocks(title="Splits! Sandbox", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue"), css=BUTTON_FILL_CSS) as demo:
297
+ # --- HEADER ---
298
  gr.Markdown(f"""
299
+ <h1 style='display:flex; align-items:center; gap:0.35em; margin:0 0 0.2em 0;'>
300
  {TITLE_ICON_HTML}
301
  <span>Splits! Language & Culture Sandbox</span>
302
  </h1>
303
 
304
+ Welcome to the companion demo for **Splits!** Explore how different sociocultural groups use entirely different vocabularies to discuss the *exact same topics*.
305
+
306
+ <div style="margin-top: 16px; margin-bottom: 8px;">
307
+ <a href="https://arxiv.org/abs/2504.04640" target="_blank"
308
+ style="display: inline-flex; align-items: center; gap: 8px; background-color: var(--button-secondary-background-fill); border: 1px solid var(--border-color-primary); border-radius: 999px; padding: 8px 18px; text-decoration: none; color: var(--body-text-color); font-weight: 500; font-size: 0.95em; box-shadow: 0 1px 3px rgba(0,0,0,0.05);">
309
+ 📄 <b>Read the paper:</b> Splits! Flexible Sociocultural Linguistic Investigation at Scale
310
+ <span style="color: var(--body-text-color-subdued); margin-left: 4px;">↗</span>
311
+ </a>
312
+ </div>
313
+ """)
314
+
315
+ with gr.Accordion("📖 What is this & How to use it?", open=False):
316
+ gr.Markdown("""
317
  ### 🤔 What is this?
318
+ The way we speak is heavily influenced by our background and culture. This tool lets you explore how different groups of people (like teachers, construction workers, or people of different faiths) use entirely different vocabularies to discuss the *same topics*.
319
+
320
  ### 🛠️ What can it do?
321
+ Think of it as a search tool for testing cultural language trends. You can test your own guesses (hypotheses) about how people talk. For example, if both Jewish and Catholic people are talking about "Healthcare", do they focus on different things?
322
+
323
  You pick the groups, pick a topic, and type in some words. The tool will then crunch the data and tell you two things:
324
  1. 📊 **Is it true? (Lift):** Does your chosen group *actually* use these words more than the other group?
325
+ 2. 💡 **Is it interesting? (Triviality):** Are these words an unexpected, deep cultural insight? Or are they just boring, obvious terms (like a Catholic person using the word "church")?
326
+
327
+ ### ⚙️ How to use
 
 
328
  1. Select a **Target** group, a **Contrast** group, and a **Discussion Topic**.
329
  2. Provide a **Candidate Lexicon** (a list of words you guess the target group uses *more* than the contrast group for this topic).
330
  3. Click **Test Hypothesis** to see if the data supports your idea!
331
+
332
  """)
333
+
334
+ # --- INPUTS ---
335
+ with gr.Row():
336
+ with gr.Column(scale=1):
337
+ gr.Markdown("### ⚙️ Step 1: Set the Context")
338
+ with gr.Group():
339
+ with gr.Row():
340
+ target_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="🎯 Target Demo", value="black", scale=2)
341
+ swap_btn = gr.Button("🔄 Swap", scale=1, min_width=60)
342
+ contrast_demo = gr.Dropdown(choices=["black", "jewish", "catholic", "hindu_jain_sikh", "construction", "teacher"], label="⚖️ Contrast Demo", value="teacher", scale=2)
343
+
344
+ choices = get_topics_for_demos("black", "teacher")
345
+ default_idx = choices[0][1] if choices and choices[0][1] != "" else None
346
+ index_dropdown = gr.Dropdown(choices=choices, label="💬 Discussion Topic", value=default_idx)
347
+
348
+ with gr.Column(scale=1):
349
+ gr.Markdown("### 📝 Step 2: Define Lexicon")
350
+ with gr.Group():
351
+ lexicon_input = gr.Textbox(
352
+ show_label=False,
353
+ placeholder="e.g. word1, phrase two, word3...",
354
+ info="Enter a comma-separated list of words/phrases you hypothesize the Target uses more than the Contrast in the selected Topic.",
355
+ lines=3
356
+ )
357
+ with gr.Row(elem_classes=["lexicon-action-row"]):
358
+ quick_triv = gr.Button("🟡 Auto-fill Trivial (Obvious)", elem_id="quick-triv-btn", scale=1)
359
+ quick_creat = gr.Button("✨ Auto-fill Creative (LLM-Generated)", elem_id="quick-creat-btn", scale=1)
360
+ clear_btn = gr.Button("🗑️ Clear", variant="secondary", elem_id="clear-btn", scale=1)
361
+
362
  test_btn = gr.Button("🚀 Test Hypothesis!", variant="primary", size="lg")
363
+
364
  gr.Markdown("---")
365
+
366
  verdict_out = gr.HTML()
367
+
368
  with gr.Row():
369
  lift_out = gr.HTML()
370
  triv_out = gr.HTML()
371
+
372
  plot_out = gr.Plot(value=tab3_plot)
373
+
374
  with gr.Accordion("🔍 View Top Retrieved Posts (Contextualize the Lexicon)", open=False):
375
  posts_out = gr.Dataframe(headers=["ID", "BM25 Score", "Demographic", "Content"], interactive=False)
376
 
cat_splits.png ADDED

Git LFS Details

  • SHA256: 93f5446223ee5d85e4369a8bac3b136154cea302c329efe859122cfc8bd692dd
  • Pointer size: 132 Bytes
  • Size of remote file: 2.44 MB