IJ-Reynolds HF Staff commited on
Commit
1a3434a
·
verified ·
1 Parent(s): 305fccb

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +20 -20
streamlit_app.py CHANGED
@@ -333,26 +333,26 @@ if active_df is not None and not active_df.empty:
333
  if hf_token:
334
  ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
335
  for i in range(num_clusters):
336
- cluster_df = weekly_df[weekly_df['cluster'] == i]
337
- sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
338
-
339
- prompt = f"""
340
- You are a highly structured D.C. Tech Policy Taxonomist. Categorize these related article titles into a SINGLE, broad policy or industry bucket.
341
-
342
- RULES:
343
- 1. MACRO CATEGORIES ONLY: Use 1 to 3 words maximum. Think of these as slide deck section headers.
344
- 2. NO HEADLINES: Absolutely NO verbs, NO company names, NO numbers, and NO dollar amounts.
345
- * BAD: "Start-Up Raises $1.3 Billion", "Congress Debates AI Bill"
346
- * GOOD: "Venture Capital", "Legislative Action", "AI Infrastructure"
347
- 3. EXAMPLES OF IDEAL LABELS: "AI Infrastructure", "Export Controls", "AI Safety", "Defense & Security", "Consumer Regulation", "Industry Update".
348
- 4. FILTER NOISE: If the articles are not about AI, compute, or tech policy, reply EXACTLY with: REJECT.
349
- 5. FORMAT: Just the category name. No quotes, no extra text.
350
-
351
- UPDATES:
352
- {sample_texts}
353
- """
354
-
355
- messages = [{"role": "user", "content": prompt}]
356
  try:
357
  response = ui_client.chat_completion(messages, max_tokens=15, temperature=0.0)
358
  topic_name = response.choices[0].message.content.strip(' "').upper()
 
333
  if hf_token:
334
  ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
335
  for i in range(num_clusters):
336
+ cluster_df = weekly_df[weekly_df['cluster'] == i]
337
+ sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
338
+
339
+ prompt = f"""
340
+ You are a highly structured D.C. Tech Policy Taxonomist. Categorize these related article titles into a SINGLE, broad policy or industry bucket.
341
+
342
+ RULES:
343
+ 1. MACRO CATEGORIES ONLY: Use 1 to 3 words maximum. Think of these as slide deck section headers.
344
+ 2. NO HEADLINES: Absolutely NO verbs, NO company names, NO numbers, and NO dollar amounts.
345
+ * BAD: "Start-Up Raises $1.3 Billion", "Congress Debates AI Bill"
346
+ * GOOD: "Venture Capital", "Legislative Action", "AI Infrastructure"
347
+ 3. EXAMPLES OF IDEAL LABELS: "AI Infrastructure", "Export Controls", "AI Safety", "Defense & Security", "Consumer Regulation", "Industry Update".
348
+ 4. FILTER NOISE: If the articles are not about AI, compute, or tech policy, reply EXACTLY with: REJECT.
349
+ 5. FORMAT: Just the category name. No quotes, no extra text.
350
+
351
+ UPDATES:
352
+ {sample_texts}
353
+ """
354
+
355
+ messages = [{"role": "user", "content": prompt}]
356
  try:
357
  response = ui_client.chat_completion(messages, max_tokens=15, temperature=0.0)
358
  topic_name = response.choices[0].message.content.strip(' "').upper()