import streamlit as st from transformers import pipeline from PIL import Image import datetime # --- PAGE CONFIG --- st.set_page_config(page_title="SaaS Media Vault", layout="wide") # Custom CSS to prevent layout shifting and "shaking" st.markdown(""" """, unsafe_allow_html=True) st.title("🗄️ SaaS Media Intelligence Vault") st.markdown("Analyze, store, and **search** your media assets using AI-generated metadata.") # --- INITIALIZE SESSION STATE --- if "media_library" not in st.session_state: st.session_state.media_library = [] # --- MODEL LOADING --- @st.cache_resource def load_models(): # CLIP for general categorization classifier = pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch32", device=-1) # BLIP for natural language description captioner = pipeline("image-text-to-text", model="Salesforce/blip-image-captioning-base", device=-1) return classifier, captioner classifier, captioner = load_models() # --- SIDEBAR: UPLOAD & SETTINGS --- st.sidebar.header("📥 Asset Management") uploaded_file = st.sidebar.file_uploader("Add New Image", type=["jpg", "png", "jpeg"]) if uploaded_file: if st.sidebar.button("Process & Index Asset"): image = Image.open(uploaded_file) with st.spinner("AI is indexing..."): # 1. BLIP Description prompt = "a photo of" caption_out = captioner(image, text=prompt, max_new_tokens=30) description = caption_out[0]['generated_text'].replace("a photo of ", "") # 2. Internal Auto-Tagging (Replaces the manual keywords input) # We use a broad set of categories to give the AI context without user input auto_labels = ["object", "person", "place", "nature", "technology", "document"] clip_out = classifier(image, candidate_labels=auto_labels) top_label = clip_out[0]['label'] top_score = clip_out[0]['score'] # 3. SAVE TO ARRAY asset_data = { "id": f"{datetime.datetime.now().timestamp()}", # Unique ID to prevent UI jitter "timestamp": datetime.datetime.now().strftime("%H:%M:%S"), "image": image, "description": description.lower(), "tag": top_label.lower(), "confidence": top_score } # Insert at the top st.session_state.media_library.insert(0, asset_data) st.sidebar.success("Asset Cataloged!") # --- MAIN SECTION: SEARCH & RETRIEVAL --- st.subheader("🔍 Intelligent Retrieval") search_query = st.text_input("Search the vault (e.g., 'flowers', 'tech', 'laptop')", "").lower() # --- FILTER LOGIC --- if search_query: filtered_items = [ item for item in st.session_state.media_library if search_query in item["description"] or search_query in item["tag"] ] else: filtered_items = st.session_state.media_library # --- DISPLAY VAULT --- st.write(f"Showing **{len(filtered_items)}** assets") if not filtered_items: st.info("No matching assets found in the vault.") else: # Using a container with a fixed key helps Streamlit manage the DOM state better for item in filtered_items: with st.container(): col1, col2 = st.columns([1, 3]) with col1: st.image(item["image"], use_container_width=True) with col2: st.write(f"**🕒 Logged:** {item['timestamp']}") st.info(f"**AI Description:** {item['description'].capitalize()}") st.write(f"**🏷️ Type:** `{item['tag']}` ({round(item['confidence']*100, 1)}%)") st.divider() # --- CLEAR UTILITY --- if st.sidebar.button("🗑️ Wipe Vault Memory"): st.session_state.media_library = [] st.rerun()