Spaces:

IJ-Reynolds
/

AI_Intel_Tracker

Running

App Files Files Community

IJ-Reynolds HF Staff commited on 13 days ago

Commit

3ff209a

verified ·

1 Parent(s): 634b6d4

Update streamlit_app.py

Browse files

Files changed (1) hide show

streamlit_app.py +5 -176

streamlit_app.py CHANGED Viewed

@@ -9,14 +9,8 @@ import os
 from huggingface_hub import InferenceClient
 import json
 import numpy as np
-from sklearn.cluster import KMeans
-import altair as alt
-from sklearn.decomposition import PCA
 import altair as alt
 from sklearn.cluster import AgglomerativeClustering
-from sklearn.manifold import TSNE
-from sklearn.metrics import silhouette_score
 # Create a global lock for file operations
 data_lock = threading.Lock()
@@ -40,7 +34,7 @@ def load_data():
             return df
         return None
-# --- RETENTION POLICY (Chronological Sort Restored) ---
 def apply_retention_policy(df):
     if df is None or df.empty:
         return df
@@ -50,7 +44,6 @@ def apply_retention_policy(df):
     # 1. Retention Filtering
     leg_df = df[df['type'] == 'Legislation']
-    # UPDATED: Added 'Legislative Office Press Release' to the 30-day retention bucket
     news_types = ['News/Media', 'Federal/Exec Action', 'Legislative Office Press Release']
     news_mask = (df['type'].isin(news_types)) & ((df['event_date'] >= today - pd.Timedelta(days=30)) | df['event_date'].isna())
     news_df = df[news_mask]
@@ -101,7 +94,6 @@ start_background_scheduler()
 st.set_page_config(page_title="PolicyPilot Intel", layout="wide")
 st.title("AI Policy and News Dashboard - ALPHA Version")
-# Onboarding Text
 st.markdown("""
 Welcome to the **AI Policy and News Dashboard**, an automated platform tracking technology policy developments, legislative movement, and media coverage.
@@ -123,7 +115,6 @@ To help you scan the chronological timeline quickly, entries are color-coded:
 To generate a high-level summary of the most recent data entries, click the **"Generate Briefing"** button below.
 """)
-# Warning Notes
 st.markdown("""
 ---
 ### Notes for Users
@@ -131,7 +122,6 @@ st.markdown("""
 2. **Work in Progress:** This is an alpha version. Improvements in coverage and AI logic will be made regularly.
 """)
-# Architecture Notes
 with st.expander("🛠️ Technical Details & Architecture"):
     st.markdown("""
     * **AI Engine:** Powered by a two-tiered AI pipeline: Llama-3.1-8B-Instruct for initial data processing and Gemma 4 31B for summarization.
@@ -160,7 +150,6 @@ with st.sidebar:
     st.divider()
     st.header("Manual Override")
-    # --- THE ANTI-SPAM COOLDOWN ---
     cooldown_minutes = 30
     can_sweep = True
     time_left = 0
@@ -197,7 +186,6 @@ with st.sidebar:
 if active_df is not None and not active_df.empty:
     st.subheader("Executive Intel Briefing")
-    # NEW: The UI Flag
     st.info("AI briefing is synthesized from the most recent sources currently visible on the **Radar** tab.")
     if 'exec_briefing' not in st.session_state:
@@ -208,21 +196,17 @@ if active_df is not None and not active_df.empty:
         if st.button("Generate Briefing"):
             with st.spinner("Gemma 31B is synthesizing your Radar intelligence..."):
-                # Filter down to the Radar logic
                 temp_df = active_df[active_df['type'].isin(selected_types)] if selected_types else active_df
                 today_ts = pd.Timestamp.now().normalize()
                 radar_df = temp_df[temp_df['event_date'] >= today_ts].sort_values(by="event_date", ascending=True)
-                # Capture the top 15 items for the Radar context
                 briefing_items = radar_df.head(15)
-                # Safe fallback in case the Radar is completely empty today
                 if briefing_items.empty:
                     briefing_items = temp_df.head(20)
                 context = "\n".join([f"• SOURCE: {row['source']} | TITLE: {row['title']} | SUMMARY: {row.get('analysis', 'N/A')}" for _, row in briefing_items.iterrows()])
-                # NEW: Strict context prompt
                 prompt = f"""
                 Provide a highly concise, 3-5 paragraph briefing based only on the recent intelligence gathered from the user's Radar tracking system.
@@ -238,12 +222,7 @@ if active_df is not None and not active_df.empty:
                 messages = [{"role": "user", "content": prompt}]
                 try:
-                    # ---------------------------------------------------------
-                    # THE TWO-TIER ARCHITECTURE: Dedicated Gemma Client
-                    # ---------------------------------------------------------
                     gemma_client = InferenceClient("google/gemma-4-31B-it", token=os.getenv("HF_TOKEN"))
-                    # Max tokens bumped slightly to accommodate the 3-5 paragraphs requested
                     response = gemma_client.chat_completion(messages, max_tokens=700, temperature=0.2)
                     st.session_state.exec_briefing = response.choices[0].message.content
                     st.rerun()
@@ -251,26 +230,16 @@ if active_df is not None and not active_df.empty:
                     st.error(f"Briefing failed. (model may be loading or hitting tier limits): {e}")
 st.divider()
-## --- Trend analysis ---
-import altair as alt
-from sklearn.cluster import AgglomerativeClustering
-import numpy as np
-import json
-import time
-import os
-import pandas as pd
-from huggingface_hub import InferenceClient
 st.subheader("Weekly AI Trend Analysis")
 st.markdown("Explore the timeline of this week's AI policy developments. **Hover over any dot** to see the specific article and source. Non-AI related noise is automatically filtered out by the AI classifier.")
 if st.button("Generate Weekly Trend Report"):
     with st.spinner("Analyzing semantic data and abstracting macro-trends... (Takes ~30 seconds)"):
-        # 1. Filter for the last 7 days
         week_ago = pd.Timestamp.now().normalize() - pd.Timedelta(days=7)
         weekly_df = active_df[active_df['event_date'] >= week_ago].copy()
-        # 2. Extract embeddings back into numpy arrays
         weekly_df = weekly_df.dropna(subset=['embedding'])
         if len(weekly_df) < 5:
@@ -278,7 +247,6 @@ if st.button("Generate Weekly Trend Report"):
         else:
             matrix = np.vstack(weekly_df['embedding'].apply(json.loads).values)
-            # 3. Create clusters
             clusterer = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, metric='cosine', linkage='complete')
             weekly_df['cluster'] = clusterer.fit_predict(matrix)
@@ -294,12 +262,10 @@ if st.button("Generate Weekly Trend Report"):
             if hf_token:
                 ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
-                # 4. Background Naming & Abstraction Loop
                 for i in range(num_clusters):
                     cluster_df = weekly_df[weekly_df['cluster'] == i]
                     sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
-                    # ---> THE ABSTRACTION PROMPT <---
                     prompt = f"""
                     You are a highly analytical D.C. Tech Policy Analyst. Review these article titles.
                     Your goal is to identify the MACRO-LEVEL AI policy, regulatory, or industry trend they represent.
@@ -318,7 +284,6 @@ if st.button("Generate Weekly Trend Report"):
                         response = ui_client.chat_completion(messages, max_tokens=15, temperature=0.0)
                         topic_name = response.choices[0].message.content.strip(' "').upper()
-                        # Catch the rejection or format the title nicely
                         if "REJECT" in topic_name:
                             weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
                         else:
@@ -328,9 +293,8 @@ if st.button("Generate Weekly Trend Report"):
                         print(f"Failed to name cluster {i}: {e}")
                         weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
-                    time.sleep(10) # API Rate Limit Safety
-                # ---> PURGE THE NOISE BEFORE VISUALIZING <---
                 clean_df = weekly_df[weekly_df['Trend Topic'] != "REJECT"].copy()
                 if clean_df.empty:
@@ -338,16 +302,12 @@ if st.button("Generate Weekly Trend Report"):
                 else:
                     st.write(f"### Top AI Trends This Week:")
-                    # Display the cleaned metrics
                     valid_clusters = clean_df['cluster'].unique()
                     for cluster_id in valid_clusters:
                         cluster_subset = clean_df[clean_df['cluster'] == cluster_id]
                         topic_label = cluster_subset['Trend Topic'].iloc[0]
                         st.metric(label=topic_label, value=f"{len(cluster_subset)} Updates")
-                    # ---------------------------------------------------------
-                    # 5. THE VISUALIZATION: Analytical Timeline Swarm
-                    # ---------------------------------------------------------
                     st.write("### Trend Timeline")
                     chart = alt.Chart(clean_df).mark_circle(size=150, opacity=0.8).encode(
@@ -361,7 +321,7 @@ if st.button("Generate Weekly Trend Report"):
                             alt.Tooltip('source:N', title='Source')
                         ]
                     ).properties(
-                        height=max(300, len(valid_clusters) * 60) # Dynamically sizes the chart height
                     ).interactive()
                     st.altair_chart(chart, use_container_width=True)
@@ -371,138 +331,7 @@ if st.button("Generate Weekly Trend Report"):
 st.divider()
-                    # ---------------------------------------------------------
-                    # 5. THE VISUALIZATION: Analytical Timeline Swarm
-                    # ---------------------------------------------------------
-                    st.write("### Trend Timeline")
-                    chart = alt.Chart(clean_df).mark_circle(size=150, opacity=0.8).encode(
-                        x=alt.X('event_date:T', title='Date', axis=alt.Axis(format='%b %d', grid=True)),
-                        y=alt.Y('Trend Topic:N', title='', sort='-x', axis=alt.Axis(labelLimit=300)),
-                        color=alt.Color('Trend Topic:N', legend=None),
-                        tooltip=[
-                            alt.Tooltip('event_date:T', title='Date', format='%b %d, %Y'),
-                            alt.Tooltip('Trend Topic:N', title='Macro Trend'),
-                            alt.Tooltip('title:N', title='Update Title'),
-                            alt.Tooltip('source:N', title='Source')
-                        ]
-                    ).properties(
-                        height=max(300, len(valid_clusters) * 60) # Dynamically sizes the chart height
-                    ).interactive()
-                    st.altair_chart(chart, use_container_width=True)
-            else:
-                st.error("Hugging Face API token not found. Cannot generate topic names.")
-st.divider()
-                # ---------------------------------------------------------
-                # 5. THE VISUALIZATION: Dynamic t-SNE Projection
-                # ---------------------------------------------------------
-                safe_perplexity = min(30, len(weekly_df) - 1)
-                tsne = TSNE(n_components=2, perplexity=safe_perplexity, metric='cosine', random_state=42, init='random')
-                coords = tsne.fit_transform(matrix)
-                weekly_df['x'] = coords[:, 0]
-                weekly_df['y'] = coords[:, 1]
-                chart = alt.Chart(weekly_df).mark_circle(size=120, opacity=0.8).encode(
-                    x=alt.X('x', axis=None),
-                    y=alt.Y('y', axis=None),
-                    color=alt.Color('Trend Topic', legend=alt.Legend(title="Identified Trends", orient="bottom")),
-                    tooltip=[
-                        alt.Tooltip('Trend Topic', title='Macro Trend'),
-                        alt.Tooltip('title', title='Update Title'),
-                        alt.Tooltip('source', title='Source Agency/Office')
-                    ]
-                ).properties(
-                    height=400
-                ).interactive()
-                st.altair_chart(chart, use_container_width=True)
-            else:
-                st.error("Hugging Face API token not found. Cannot generate topic names.")
-st.divider()
 # --- VISUAL CARD RENDERER ---
 def render_event_cards(display_df):
     if display_df.empty:
-        st.info("No items match these filters.")
-        return
-    # Visual Mapping for rapid scanning (Updated with Megaphone)
-    type_icons = {
-        "Legislation": "🟣",
-        "Federal/Exec Action": "🟢",
-        "News/Media": "🔵",
-        "Schedule/Hearing": "🟠",
-        "Hearing/Markup": "🟠",
-        "Legislative Office Press Release": "📣"
-    }
-    color_map = {
-        "News/Media": "blue",
-        "Federal/Exec Action": "green",
-        "Legislation": "violet",
-        "Schedule/Hearing": "orange",
-        "Hearing/Markup": "orange",
-        "Legislative Office Press Release": "red"
-    }
-    for _, row in display_df.iterrows():
-        dt = row['event_date']
-        dt_str = dt.strftime('%b %d, %Y') if pd.notnull(dt) else "Date TBD"
-        card_type = row['type']
-        icon = type_icons.get(card_type, "⚪")
-        color = color_map.get(card_type, "gray")
-        source = row.get('source', 'Unknown Source')
-        raw_title = str(row['title'])
-        display_title = raw_title[:75] + ("..." if len(raw_title) > 75 else "")
-        # Injected {source} directly into the UI header
-        with st.expander(f"{icon} {dt_str} | {card_type} | {source} | {display_title}"):
-            col1, col2 = st.columns([3, 1])
-            with col1:
-                st.markdown("### Executive Summary")
-                st.info(row.get('analysis', 'Analysis pending...'))
-                st.caption(f"**Keywords:** `{row.get('keywords', 'N/A')}`")
-            with col2:
-                st.markdown("### Metadata")
-                st.markdown(f"**Category:** :{color}[{card_type}]")
-                st.write(f"**Source:** {source}")
-                st.write(f"**Action:** {row['latest_action']}")
-                st.link_button("View Source", str(row['link']), use_container_width=True)
-if active_df is not None and not active_df.empty:
-    if selected_types:
-        filtered_df = active_df[active_df['type'].isin(selected_types)]
-    else:
-        filtered_df = active_df
-    search = st.text_input("Search Intel Dashboard...", "")
-    if search:
-        mask = filtered_df[['title', 'summary', 'analysis', 'keywords', 'source']].apply(lambda x: x.astype(str).str.contains(search, case=False)).any(axis=1)
-        filtered_df = filtered_df[mask]
-    today_ts = pd.Timestamp.now().normalize()
-    radar_cutoff = today_ts - pd.Timedelta(days=1)
-    # Radar captures Upcoming + Today + Yesterday. (Sorted newest to oldest)
-    radar_df = filtered_df[filtered_df['event_date'] >= radar_cutoff].sort_values(by="event_date", ascending=False)
-    # Archive catches everything older than the cutoff
-    archive_df = filtered_df[(filtered_df['event_date'] < radar_cutoff) | (filtered_df['event_date'].isna())].sort_values(by="event_date", ascending=False)
-    tab1, tab2 = st.tabs([f"Radar ({len(radar_df)})", f"Archive ({len(archive_df)})"])
-    with tab1: render_event_cards(radar_df)
-    with tab2: render_event_cards(archive_df)
-else:
-    st.warning("Dashboard empty. Run 'Force Manual Sweep' to populate.")

 from huggingface_hub import InferenceClient
 import json
 import numpy as np
 import altair as alt
 from sklearn.cluster import AgglomerativeClustering
 # Create a global lock for file operations
 data_lock = threading.Lock()
             return df
         return None
+# --- RETENTION POLICY ---
 def apply_retention_policy(df):
     if df is None or df.empty:
         return df
     # 1. Retention Filtering
     leg_df = df[df['type'] == 'Legislation']
     news_types = ['News/Media', 'Federal/Exec Action', 'Legislative Office Press Release']
     news_mask = (df['type'].isin(news_types)) & ((df['event_date'] >= today - pd.Timedelta(days=30)) | df['event_date'].isna())
     news_df = df[news_mask]
 st.set_page_config(page_title="PolicyPilot Intel", layout="wide")
 st.title("AI Policy and News Dashboard - ALPHA Version")
 st.markdown("""
 Welcome to the **AI Policy and News Dashboard**, an automated platform tracking technology policy developments, legislative movement, and media coverage.
 To generate a high-level summary of the most recent data entries, click the **"Generate Briefing"** button below.
 """)
 st.markdown("""
 ---
 ### Notes for Users
 2. **Work in Progress:** This is an alpha version. Improvements in coverage and AI logic will be made regularly.
 """)
 with st.expander("🛠️ Technical Details & Architecture"):
     st.markdown("""
     * **AI Engine:** Powered by a two-tiered AI pipeline: Llama-3.1-8B-Instruct for initial data processing and Gemma 4 31B for summarization.
     st.divider()
     st.header("Manual Override")
     cooldown_minutes = 30
     can_sweep = True
     time_left = 0
 if active_df is not None and not active_df.empty:
     st.subheader("Executive Intel Briefing")
     st.info("AI briefing is synthesized from the most recent sources currently visible on the **Radar** tab.")
     if 'exec_briefing' not in st.session_state:
         if st.button("Generate Briefing"):
             with st.spinner("Gemma 31B is synthesizing your Radar intelligence..."):
                 temp_df = active_df[active_df['type'].isin(selected_types)] if selected_types else active_df
                 today_ts = pd.Timestamp.now().normalize()
                 radar_df = temp_df[temp_df['event_date'] >= today_ts].sort_values(by="event_date", ascending=True)
                 briefing_items = radar_df.head(15)
                 if briefing_items.empty:
                     briefing_items = temp_df.head(20)
                 context = "\n".join([f"• SOURCE: {row['source']} | TITLE: {row['title']} | SUMMARY: {row.get('analysis', 'N/A')}" for _, row in briefing_items.iterrows()])
                 prompt = f"""
                 Provide a highly concise, 3-5 paragraph briefing based only on the recent intelligence gathered from the user's Radar tracking system.
                 messages = [{"role": "user", "content": prompt}]
                 try:
                     gemma_client = InferenceClient("google/gemma-4-31B-it", token=os.getenv("HF_TOKEN"))
                     response = gemma_client.chat_completion(messages, max_tokens=700, temperature=0.2)
                     st.session_state.exec_briefing = response.choices[0].message.content
                     st.rerun()
                     st.error(f"Briefing failed. (model may be loading or hitting tier limits): {e}")
 st.divider()
+# --- TREND ANALYSIS ---
 st.subheader("Weekly AI Trend Analysis")
 st.markdown("Explore the timeline of this week's AI policy developments. **Hover over any dot** to see the specific article and source. Non-AI related noise is automatically filtered out by the AI classifier.")
 if st.button("Generate Weekly Trend Report"):
     with st.spinner("Analyzing semantic data and abstracting macro-trends... (Takes ~30 seconds)"):
         week_ago = pd.Timestamp.now().normalize() - pd.Timedelta(days=7)
         weekly_df = active_df[active_df['event_date'] >= week_ago].copy()
         weekly_df = weekly_df.dropna(subset=['embedding'])
         if len(weekly_df) < 5:
         else:
             matrix = np.vstack(weekly_df['embedding'].apply(json.loads).values)
             clusterer = AgglomerativeClustering(n_clusters=None, distance_threshold=0.55, metric='cosine', linkage='complete')
             weekly_df['cluster'] = clusterer.fit_predict(matrix)
             if hf_token:
                 ui_client = InferenceClient("meta-llama/Llama-3.1-8B-Instruct", token=hf_token)
                 for i in range(num_clusters):
                     cluster_df = weekly_df[weekly_df['cluster'] == i]
                     sample_texts = "\n".join(cluster_df['title'].head(8).tolist())
                     prompt = f"""
                     You are a highly analytical D.C. Tech Policy Analyst. Review these article titles.
                     Your goal is to identify the MACRO-LEVEL AI policy, regulatory, or industry trend they represent.
                         response = ui_client.chat_completion(messages, max_tokens=15, temperature=0.0)
                         topic_name = response.choices[0].message.content.strip(' "').upper()
                         if "REJECT" in topic_name:
                             weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
                         else:
                         print(f"Failed to name cluster {i}: {e}")
                         weekly_df.loc[weekly_df['cluster'] == i, 'Trend Topic'] = "REJECT"
+                    time.sleep(10)
                 clean_df = weekly_df[weekly_df['Trend Topic'] != "REJECT"].copy()
                 if clean_df.empty:
                 else:
                     st.write(f"### Top AI Trends This Week:")
                     valid_clusters = clean_df['cluster'].unique()
                     for cluster_id in valid_clusters:
                         cluster_subset = clean_df[clean_df['cluster'] == cluster_id]
                         topic_label = cluster_subset['Trend Topic'].iloc[0]
                         st.metric(label=topic_label, value=f"{len(cluster_subset)} Updates")
                     st.write("### Trend Timeline")
                     chart = alt.Chart(clean_df).mark_circle(size=150, opacity=0.8).encode(
                             alt.Tooltip('source:N', title='Source')
                         ]
                     ).properties(
+                        height=max(300, len(valid_clusters) * 60)
                     ).interactive()
                     st.altair_chart(chart, use_container_width=True)
 st.divider()
 # --- VISUAL CARD RENDERER ---
 def render_event_cards(display_df):
     if display_df.empty:
+        st.info("No items match these filters.")