Spaces:

IJ-Reynolds
/

AI_Intel_Tracker

Running

App Files Files Community

IJ-Reynolds HF Staff commited on 2 days ago

Commit

09dd029

verified ·

1 Parent(s): 8fb7a85

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +69 -24

streamlit_app.py CHANGED Viewed

@@ -252,12 +252,13 @@ if active_df is not None and not active_df.empty:
 st.divider()
-## --- Trend analysis ---
 st.subheader("Weekly AI Trend Analysis")
-st.markdown("Explore the semantic relationships between this week's policy updates. **Hover over any dot** to see the specific article and source. The math engine automatically determines the number of distinct macro-trends based on semantic density.")
 if st.button("Generate Weekly Trend Report"):
-    with st.spinner("Calculating high-fidelity semantic topography... (Takes ~30 seconds)"):
         # 1. Filter for the last 7 days
         week_ago = pd.Timestamp.now().normalize() - pd.Timedelta(days=7)
         weekly_df = active_df[active_df['event_date'] >= week_ago].copy()
@@ -266,24 +267,19 @@ if st.button("Generate Weekly Trend Report"):
         weekly_df = weekly_df.dropna(subset=['embedding'])
         if len(weekly_df) < 5:
-            st.warning(f"Only {len(weekly_df)} embedded updates found this week. The AI requires at least 5 to confidently calculate mathematical trends.")
         else:
             matrix = np.vstack(weekly_df['embedding'].apply(json.loads).values)
-            # ---------------------------------------------------------
-            # HIGH FIDELITY FIX: Distance Threshold + Complete Linkage
-            # distance_threshold=0.55 forces tight semantic groupings.
-            # ---------------------------------------------------------
             clusterer = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, metric='cosine', linkage='complete')
             weekly_df['cluster'] = clusterer.fit_predict(matrix)
             num_clusters = weekly_df['cluster'].nunique()
-            # Guardrail: If it makes too many micro-clusters, cap it at 6
-            if num_clusters > 6:
-                clusterer = AgglomerativeClustering(n_clusters=6, metric='cosine', linkage='complete')
                 weekly_df['cluster'] = clusterer.fit_predict(matrix)
-                num_clusters = 6
             weekly_df['Trend Topic'] = "Uncategorized"
@@ -291,18 +287,20 @@ if st.button("Generate Weekly Trend Report"):
             if hf_token:
                 ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
-                # 4. Background Naming Loop
-                st.write(f"### Top {num_clusters} Trends This Week:")
                 for i in range(num_clusters):
                     cluster_df = weekly_df[weekly_df['cluster'] == i]
-                    # FIDELITY FIX #2: Feed the AI up to 8 titles instead of just 3!
                     sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
                     prompt = f"""
-                    Analyze these related policy updates and provide a highly specific, concise, 2-to-4 word title for this macro-trend (e.g., "Export Control Expansion" or "AI Safety Funding").
-                    Identify the exact specific policy mechanism happening. Do not use generic words like "Updates", "General", "Various", "Irrelevant", or "Other".
-                    Do not include quotes. Just the title.
                     UPDATES:
                     {sample_texts}
@@ -310,15 +308,62 @@ if st.button("Generate Weekly Trend Report"):
                     messages = [{"role": "user", "content": prompt}]
                     try:
-                        response = ui_client.chat_completion(messages, max_tokens=15, temperature=0.1)
-                        topic_name = response.choices[0].message.content.strip(' "')
-                        weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = topic_name
                     except Exception as e:
                         print(f"Failed to name cluster {i}: {e}")
-                        weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = f"Trend Cluster {i+1}"
                     time.sleep(10) # API Rate Limit Safety
                 # ---------------------------------------------------------
                 # 5. THE VISUALIZATION: Dynamic t-SNE Projection
                 # ---------------------------------------------------------

 st.divider()
+#Trend Analysis (text categories)
 st.subheader("Weekly AI Trend Analysis")
+st.markdown("Explore the timeline of this week's AI policy developments. **Hover over any dot** to see the specific article and source. Non-AI related noise is automatically filtered out by the AI classifier.")
 if st.button("Generate Weekly Trend Report"):
+    with st.spinner("Analyzing semantic data and abstracting macro-trends... (Takes ~30 seconds)"):
         # 1. Filter for the last 7 days
         week_ago = pd.Timestamp.now().normalize() - pd.Timedelta(days=7)
         weekly_df = active_df[active_df['event_date'] >= week_ago].copy()
         weekly_df = weekly_df.dropna(subset=['embedding'])
         if len(weekly_df) < 5:
+            st.warning(f"Only {len(weekly_df)} embedded updates found this week. The AI requires at least 5 to calculate mathematical trends.")
         else:
             matrix = np.vstack(weekly_df['embedding'].apply(json.loads).values)
+            # 3. Create clusters
             clusterer = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, metric='cosine', linkage='complete')
             weekly_df['cluster'] = clusterer.fit_predict(matrix)
             num_clusters = weekly_df['cluster'].nunique()
+            if num_clusters > 8:
+                clusterer = AgglomerativeClustering(n_clusters=8, metric='cosine', linkage='complete')
                 weekly_df['cluster'] = clusterer.fit_predict(matrix)
+                num_clusters = 8
             weekly_df['Trend Topic'] = "Uncategorized"
             if hf_token:
                 ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
+                # 4. Background Naming & Abstraction Loop
                 for i in range(num_clusters):
                     cluster_df = weekly_df[weekly_df['cluster'] == i]
                     sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
+                    # ---> THE ABSTRACTION PROMPT <---
                     prompt = f"""
+                    You are a highly analytical D.C. Tech Policy Analyst. Review these article titles.
+                    Your goal is to identify the MACRO-LEVEL AI policy, regulatory, or industry trend they represent.
+                    RULES:
+                    1. ABSTRACT UP: If the articles are about specific consumer products (like a dating app), specific niche companies, or localized events, DO NOT name the specific product or company. Name the broader industry trend (e.g., "Commercial AI Deployment", "Consumer Algorithms", "Market Consolidation").
+                    2. FILTER NOISE: If the articles have absolutely nothing to do with AI, algorithms, compute, or tech policy, reply with EXACTLY the word: REJECT.
+                    3. FORMAT: Provide a concise 2-to-4 word title. No quotes, no extra text.
                     UPDATES:
                     {sample_texts}
                     messages = [{"role": "user", "content": prompt}]
                     try:
+                        response = ui_client.chat_completion(messages, max_tokens=15, temperature=0.0)
+                        topic_name = response.choices[0].message.content.strip(' "').upper()
+                        # Catch the rejection or format the title nicely
+                        if "REJECT" in topic_name:
+                            weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
+                        else:
+                            weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = topic_name.title()
                     except Exception as e:
                         print(f"Failed to name cluster {i}: {e}")
+                        weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
                     time.sleep(10) # API Rate Limit Safety
+                # ---> PURGE THE NOISE BEFORE VISUALIZING <---
+                clean_df = weekly_df[weekly_df['Trend Topic'] != "REJECT"].copy()
+                if clean_df.empty:
+                    st.warning("All data this week was classified as non-AI noise by the analyst model.")
+                else:
+                    st.write(f"### Top AI Trends This Week:")
+                    # Display the cleaned metrics
+                    valid_clusters = clean_df['cluster'].unique()
+                    for cluster_id in valid_clusters:
+                        cluster_subset = clean_df[clean_df['cluster'] == cluster_id]
+                        topic_label = cluster_subset['Trend Topic'].iloc[0]
+                        st.metric(label=topic_label, value=f"{len(cluster_subset)} Updates")
+                    # ---------------------------------------------------------
+                    # 5. THE VISUALIZATION: Analytical Timeline Swarm
+                    # ---------------------------------------------------------
+                    st.write("### Trend Timeline")
+                    chart = alt.Chart(clean_df).mark_circle(size=150, opacity=0.8).encode(
+                        x=alt.X('event_date:T', title='Date', axis=alt.Axis(format='%b %d', grid=True)),
+                        y=alt.Y('Trend Topic:N', title='', sort='-x', axis=alt.Axis(labelLimit=300)),
+                        color=alt.Color('Trend Topic:N', legend=None),
+                        tooltip=[
+                            alt.Tooltip('event_date:T', title='Date', format='%b %d, %Y'),
+                            alt.Tooltip('Trend Topic:N', title='Macro Trend'),
+                            alt.Tooltip('title:N', title='Update Title'),
+                            alt.Tooltip('source:N', title='Source')
+                        ]
+                    ).properties(
+                        height=max(300, len(valid_clusters) * 60) # Dynamically sizes the chart height
+                    ).interactive()
+                    st.altair_chart(chart, use_container_width=True)
+            else:
+                st.error("Hugging Face API token not found. Cannot generate topic names.")
+st.divider()
                 # ---------------------------------------------------------
                 # 5. THE VISUALIZATION: Dynamic t-SNE Projection
                 # ---------------------------------------------------------