Spaces:

IJ-Reynolds
/

AI_Intel_Tracker

Running

App Files Files Community

IJ-Reynolds HF Staff commited on 12 days ago

Commit

5e8c57b

verified ·

1 Parent(s): 3ff209a

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +45 -11

streamlit_app.py CHANGED Viewed

@@ -231,15 +231,27 @@ if active_df is not None and not active_df.empty:
 st.divider()
-# --- TREND ANALYSIS ---
 st.subheader("Weekly AI Trend Analysis")
-st.markdown("Explore the timeline of this week's AI policy developments. **Hover over any dot** to see the specific article and source. Non-AI related noise is automatically filtered out by the AI classifier.")
 if st.button("Generate Weekly Trend Report"):
-    with st.spinner("Analyzing semantic data and abstracting macro-trends... (Takes ~30 seconds)"):
         week_ago = pd.Timestamp.now().normalize() - pd.Timedelta(days=7)
         weekly_df = active_df[active_df['event_date'] >= week_ago].copy()
         weekly_df = weekly_df.dropna(subset=['embedding'])
         if len(weekly_df) < 5:
@@ -247,6 +259,7 @@ if st.button("Generate Weekly Trend Report"):
         else:
             matrix = np.vstack(weekly_df['embedding'].apply(json.loads).values)
             clusterer = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, metric='cosine', linkage='complete')
             weekly_df['cluster'] = clusterer.fit_predict(matrix)
@@ -262,10 +275,12 @@ if st.button("Generate Weekly Trend Report"):
             if hf_token:
                 ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
                 for i in range(num_clusters):
                     cluster_df = weekly_df[weekly_df['cluster'] == i]
                     sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
                     prompt = f"""
                     You are a highly analytical D.C. Tech Policy Analyst. Review these article titles.
                     Your goal is to identify the MACRO-LEVEL AI policy, regulatory, or industry trend they represent.
@@ -293,8 +308,9 @@ if st.button("Generate Weekly Trend Report"):
                         print(f"Failed to name cluster {i}: {e}")
                         weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
-                    time.sleep(10)
                 clean_df = weekly_df[weekly_df['Trend Topic'] != "REJECT"].copy()
                 if clean_df.empty:
@@ -308,20 +324,38 @@ if st.button("Generate Weekly Trend Report"):
                         topic_label = cluster_subset['Trend Topic'].iloc[0]
                         st.metric(label=topic_label, value=f"{len(cluster_subset)} Updates")
-                    st.write("### Trend Timeline")
                     chart = alt.Chart(clean_df).mark_circle(size=150, opacity=0.8).encode(
-                        x=alt.X('event_date:T', title='Date', axis=alt.Axis(format='%b %d', grid=True)),
-                        y=alt.Y('Trend Topic:N', title='', sort='-x', axis=alt.Axis(labelLimit=300)),
-                        color=alt.Color('Trend Topic:N', legend=None),
                         tooltip=[
-                            alt.Tooltip('event_date:T', title='Date', format='%b %d, %Y'),
                             alt.Tooltip('Trend Topic:N', title='Macro Trend'),
                             alt.Tooltip('title:N', title='Update Title'),
-                            alt.Tooltip('source:N', title='Source')
                         ]
                     ).properties(
-                        height=max(300, len(valid_clusters) * 60)
                     ).interactive()
                     st.altair_chart(chart, use_container_width=True)

 st.divider()
+## --- Trend analysis ---
+import altair as alt
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.manifold import TSNE
+import numpy as np
+import json
+import time
+import os
+import pandas as pd
+from huggingface_hub import InferenceClient
 st.subheader("Weekly AI Trend Analysis")
+st.markdown("Explore the semantic relationships between this week's AI policy updates. **Hover over any dot** to see the specific article and source. Non-AI related noise is automatically filtered out by the AI classifier, and dots clustered closely together share similar policy themes.")
 if st.button("Generate Weekly Trend Report"):
+    with st.spinner("Analyzing semantic data, abstracting macro-trends, and mapping 2D space... (Takes ~30 seconds)"):
+        # 1. Filter for the last 7 days
         week_ago = pd.Timestamp.now().normalize() - pd.Timedelta(days=7)
         weekly_df = active_df[active_df['event_date'] >= week_ago].copy()
+        # 2. Extract embeddings back into numpy arrays
         weekly_df = weekly_df.dropna(subset=['embedding'])
         if len(weekly_df) < 5:
         else:
             matrix = np.vstack(weekly_df['embedding'].apply(json.loads).values)
+            # 3. Create clusters (using the high-fidelity tight settings)
             clusterer = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, metric='cosine', linkage='complete')
             weekly_df['cluster'] = clusterer.fit_predict(matrix)
             if hf_token:
                 ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
+                # 4. Background Naming & Abstraction Loop
                 for i in range(num_clusters):
                     cluster_df = weekly_df[weekly_df['cluster'] == i]
                     sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
+                    # ---> THE ABSTRACTION PROMPT <---
                     prompt = f"""
                     You are a highly analytical D.C. Tech Policy Analyst. Review these article titles.
                     Your goal is to identify the MACRO-LEVEL AI policy, regulatory, or industry trend they represent.
                         print(f"Failed to name cluster {i}: {e}")
                         weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
+                    time.sleep(10) # API Rate Limit Safety
+                # ---> PURGE THE NOISE BEFORE VISUALIZING <---
                 clean_df = weekly_df[weekly_df['Trend Topic'] != "REJECT"].copy()
                 if clean_df.empty:
                         topic_label = cluster_subset['Trend Topic'].iloc[0]
                         st.metric(label=topic_label, value=f"{len(cluster_subset)} Updates")
+                    # ---------------------------------------------------------
+                    # 5. THE VISUALIZATION: Dynamic t-SNE Projection on CLEAN DATA
+                    # ---------------------------------------------------------
+                    st.write("### Semantic Cluster Map")
+                    # Extract only the clean embeddings for the map
+                    clean_matrix = np.vstack(clean_df['embedding'].apply(json.loads).values)
+                    if len(clean_df) > 1:
+                        safe_perplexity = max(1, min(30, len(clean_df) - 1))
+                        tsne = TSNE(n_components=2, perplexity=safe_perplexity, metric='cosine', random_state=42, init='random')
+                        coords = tsne.fit_transform(clean_matrix)
+                        clean_df['x'] = coords[:, 0]
+                        clean_df['y'] = coords[:, 1]
+                    else:
+                        # Fallback if only 1 article survives the filter
+                        clean_df['x'] = 0
+                        clean_df['y'] = 0
                     chart = alt.Chart(clean_df).mark_circle(size=150, opacity=0.8).encode(
+                        x=alt.X('x', axis=None),
+                        y=alt.Y('y', axis=None),
+                        color=alt.Color('Trend Topic:N', legend=alt.Legend(title="Macro Trends", orient="bottom")),
                         tooltip=[
                             alt.Tooltip('Trend Topic:N', title='Macro Trend'),
                             alt.Tooltip('title:N', title='Update Title'),
+                            alt.Tooltip('source:N', title='Source'),
+                            alt.Tooltip('event_date:T', title='Date', format='%b %d, %Y')
                         ]
                     ).properties(
+                        height=400
                     ).interactive()
                     st.altair_chart(chart, use_container_width=True)