import streamlit as st import chromadb import pandas as pd import os # Fetch environment variables (defaults to your provided HF Space) CHROMA_HOST = os.getenv("CHROMA_HOST", "vish85521-chromadb.hf.space") CHROMA_PORT = os.getenv("CHROMA_PORT", "443") CHROMA_SSL = os.getenv("CHROMA_SSL", "True").lower() in ("true", "1", "t") st.set_page_config(page_title="ChromaDB Monitor", page_icon="📊", layout="wide") st.title("📊 ChromaDB Monitor") @st.cache_resource def get_chroma_client(): return chromadb.HttpClient( host=CHROMA_HOST, port=CHROMA_PORT, ssl=CHROMA_SSL ) try: with st.spinner(f"Connecting to {CHROMA_HOST}..."): client = get_chroma_client() # Check heartbeats to ensure connection is alive tenant_heartbeat = client.heartbeat() st.success(f"Connected successfully! (Heartbeat: {tenant_heartbeat})") collections = client.list_collections() if not collections: st.warning("No collections found in this ChromaDB instance.") else: # Extract collection names for the dropdown collection_names = [c.name for c in collections] selected_collection = st.selectbox("Select a Collection to Monitor", collection_names) if selected_collection: col = client.get_collection(selected_collection) count = col.count() st.metric(label="Total Documents in Collection", value=count) if count > 0: st.subheader("Sample Data (Top 10 Rows)") # Peek grabs a small sample of the data without doing a similarity search results = col.peek(limit=10) # Flatten the ChromaDB output into a readable Pandas DataFrame if results and results.get("ids"): df = pd.DataFrame({ "ID": results["ids"], "Metadata": [str(m) for m in results["metadatas"]], "Document": results["documents"] }) st.dataframe(df, use_container_width=True) else: st.info("Collection data cannot be peeked or is empty.") except Exception as e: st.error(f"Failed to connect to ChromaDB.") st.error(f"Error Details: {e}") st.code(f"Host: {CHROMA_HOST}\nPort: {CHROMA_PORT}\nSSL: {CHROMA_SSL}")