import io import mimetypes import os import tempfile from html import escape from pathlib import Path from string import Template import PyPDF2 import requests import streamlit as st from dotenv import load_dotenv load_dotenv() # LlamaIndex imports for RAG retrieval try: from llama_index.core import Settings, StorageContext, load_index_from_storage from llama_index.embeddings.huggingface import HuggingFaceEmbedding LLAMA_INDEX_AVAILABLE = True except ImportError: LLAMA_INDEX_AVAILABLE = False # GitHub repo that hosts study materials via Releases + manifest.json # Format: "owner/repo" MATERIALS_REPO = os.getenv("MATERIALS_REPO", "KunalGupta25/plexi-materials") MANIFEST_BRANCH = "main" THEME_MODE_STATE_KEY = "plexi_theme_mode" THEME_MODE_WIDGET_KEY = "_plexi_theme_mode_widget" APP_ICON_PATH = str(Path(__file__).resolve().with_name("plexi-mcp-square-logo.svg")) LIGHT_PALETTE = { "ink": "#16312c", "muted": "#5b6c66", "bg": "#f5f0e8", "panel": "rgba(255, 252, 247, 0.88)", "panel_strong": "#fffaf1", "line": "rgba(22, 49, 44, 0.11)", "accent": "#1d7a63", "accent_soft": "#d7efe4", "highlight": "#f4b860", "shadow": "0 18px 60px rgba(30, 48, 43, 0.08)", "app_background": """ radial-gradient(circle at top left, rgba(244, 184, 96, 0.18), transparent 28%), radial-gradient(circle at top right, rgba(29, 122, 99, 0.14), transparent 30%), linear-gradient(180deg, #fbf7ef 0%, #f4ecde 100%) """, "hero_background": """ linear-gradient(135deg, rgba(29, 122, 99, 0.08), rgba(255, 250, 241, 0.92)), rgba(255, 252, 247, 0.88) """, "chip_background": "rgba(29, 122, 99, 0.08)", "chip_border": "rgba(29, 122, 99, 0.12)", "button_border": "rgba(29, 122, 99, 0.14)", "button_surface": "#f8fbfa", "button_hover": "#eef7f2", "primary_button": "linear-gradient(135deg, #1d7a63, #245e74)", "sidebar_background": """ linear-gradient(180deg, rgba(255, 251, 245, 0.98), rgba(246, 238, 224, 0.96)) """, "expander_background": "rgba(255, 251, 245, 0.72)", "meta_background": "rgba(255, 251, 245, 0.72)", "divider": "linear-gradient(90deg, rgba(29, 122, 99, 0.25), transparent)", "meta_row_border": "rgba(22, 49, 44, 0.08)", "bottom_background": "#fbf7ef", } DARK_PALETTE = { "ink": "#eef4ef", "muted": "#b8c6c0", "bg": "#0d1715", "panel": "rgba(20, 31, 29, 0.9)", "panel_strong": "#15211f", "line": "rgba(196, 223, 211, 0.14)", "accent": "#54c6a2", "accent_soft": "#17392f", "highlight": "#f0b564", "shadow": "0 22px 70px rgba(0, 0, 0, 0.32)", "app_background": """ radial-gradient(circle at top left, rgba(240, 181, 100, 0.12), transparent 28%), radial-gradient(circle at top right, rgba(84, 198, 162, 0.12), transparent 32%), linear-gradient(180deg, #0f1b19 0%, #09110f 100%) """, "hero_background": """ linear-gradient(135deg, rgba(84, 198, 162, 0.12), rgba(16, 28, 25, 0.92)), rgba(20, 31, 29, 0.9) """, "chip_background": "rgba(84, 198, 162, 0.12)", "chip_border": "rgba(84, 198, 162, 0.18)", "button_border": "rgba(84, 198, 162, 0.18)", "button_surface": "rgba(84, 198, 162, 0.14)", "button_hover": "rgba(84, 198, 162, 0.22)", "primary_button": "linear-gradient(135deg, #2ea483, #245e74)", "sidebar_background": """ linear-gradient(180deg, rgba(17, 28, 26, 0.98), rgba(12, 20, 18, 0.97)) """, "expander_background": "rgba(17, 28, 26, 0.84)", "meta_background": "rgba(19, 31, 28, 0.84)", "divider": "linear-gradient(90deg, rgba(84, 198, 162, 0.32), transparent)", "meta_row_border": "rgba(196, 223, 211, 0.1)", "bottom_background": "#09110f", } def get_theme_mode(): """Return the selected appearance mode.""" if THEME_MODE_STATE_KEY not in st.session_state: st.session_state[THEME_MODE_STATE_KEY] = "system" return st.session_state[THEME_MODE_STATE_KEY] def sync_theme_mode(): """Persist the appearance selector value across page switches.""" st.session_state[THEME_MODE_STATE_KEY] = st.session_state.get( THEME_MODE_WIDGET_KEY, "System" ).lower() def _css_vars_block(palette): """Return CSS custom property definitions for a palette.""" return "\n".join( [ f" --plexi-ink: {palette['ink']};", f" --plexi-muted: {palette['muted']};", f" --plexi-bg: {palette['bg']};", f" --plexi-panel: {palette['panel']};", f" --plexi-panel-strong: {palette['panel_strong']};", f" --plexi-line: {palette['line']};", f" --plexi-accent: {palette['accent']};", f" --plexi-accent-soft: {palette['accent_soft']};", f" --plexi-highlight: {palette['highlight']};", f" --plexi-shadow: {palette['shadow']};", f" --plexi-app-background: {palette['app_background']};", f" --plexi-hero-background: {palette['hero_background']};", f" --plexi-chip-background: {palette['chip_background']};", f" --plexi-chip-border: {palette['chip_border']};", f" --plexi-button-border: {palette['button_border']};", f" --plexi-button-surface: {palette['button_surface']};", f" --plexi-button-hover: {palette['button_hover']};", f" --plexi-primary-button: {palette['primary_button']};", f" --plexi-sidebar-background: {palette['sidebar_background']};", f" --plexi-expander-background: {palette['expander_background']};", f" --plexi-meta-background: {palette['meta_background']};", f" --plexi-divider: {palette['divider']};", f" --plexi-meta-row-border: {palette['meta_row_border']};", f" --plexi-bottom-background: {palette['bottom_background']};", ] ) def inject_theme(): """Inject the shared visual language for the Streamlit app.""" theme_mode = get_theme_mode() palette = DARK_PALETTE if theme_mode == "dark" else LIGHT_PALETTE system_css = "" color_scheme = "dark" if theme_mode == "dark" else "light" if theme_mode == "system": system_css = f""" @media (prefers-color-scheme: dark) {{ :root {{ {_css_vars_block(DARK_PALETTE)} }} html {{ color-scheme: dark; }} }} """ css = Template( """ """ ).substitute( { "palette_vars": _css_vars_block(palette), "color_scheme": color_scheme, "system_css": system_css, } ) st.markdown(css, unsafe_allow_html=True) def summarize_manifest(manifest): """Return top-level counts for the materials catalog.""" subject_total = sum(len(subjects) for subjects in manifest.values()) file_total = sum( len(files) for subjects in manifest.values() for types in subjects.values() for files in types.values() ) material_types = sorted( { material_type for subjects in manifest.values() for types in subjects.values() for material_type in types.keys() } ) return { "semester_count": len(manifest), "subject_count": subject_total, "file_count": file_total, "material_types": material_types, } def summarize_subject_catalog(subject_data): """Return counts for one selected subject catalog.""" return { "type_count": len(subject_data), "file_count": sum(len(files) for files in subject_data.values()), "types": sorted(subject_data.keys()), } def render_page_header(kicker, title, subtitle, badges=None): """Render a shared hero block for each page.""" badge_html = "" if badges: badge_html = "".join( f'{escape(str(badge))}' for badge in badges if badge ) badge_html = f'
{badge_html}
' st.markdown( f"""
{escape(kicker)}

{escape(title)}

{escape(subtitle)}

{badge_html}
""", unsafe_allow_html=True, ) def render_stat_cards(cards): """Render compact metrics in a responsive grid.""" if not cards: return cols = st.columns(len(cards)) for col, card in zip(cols, cards): label = escape(str(card.get("label", ""))) value = escape(str(card.get("value", ""))) note = escape(str(card.get("note", ""))) with col: st.markdown( f"""
{label}
{value}
{note}
""", unsafe_allow_html=True, ) def render_panel(title, body, tone="default"): """Render a simple informational panel.""" panel_class = "plexi-callout" if tone == "callout" else "plexi-panel" st.markdown( f"""
{escape(title)}
{escape(body)}
""", unsafe_allow_html=True, ) def _manifest_url(): """Raw GitHub URL for manifest.json.""" return f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/manifest.json" @st.cache_data(ttl=300, show_spinner=False) def get_manifest(): """Fetch the materials manifest from GitHub. Cached for 5 minutes.""" url = _manifest_url() resp = requests.get(url, timeout=15) resp.raise_for_status() return resp.json() def download_github_file(download_url, max_retries=3): """Download a file from a GitHub Release asset URL with retry logic.""" for attempt in range(max_retries): try: resp = requests.get(download_url, timeout=60) resp.raise_for_status() return resp.content except requests.RequestException as err: print(f"Download error (attempt {attempt + 1}): {err}") if attempt == max_retries - 1: raise return None def get_mime_type(filename): """Guess MIME type from filename extension.""" mime, _ = mimetypes.guess_type(filename) return mime or "application/octet-stream" def render_sidebar_intro(): """Render the shared sidebar intro card.""" with st.sidebar: st.markdown( """
Plexi
Grounded study assistant
Browse materials, preview files, and ask questions backed by the currently loaded course content.
""", unsafe_allow_html=True, ) def render_sidebar_footer(): """Render shared appearance controls and outbound links at the end of the sidebar.""" with st.sidebar: current_mode = get_theme_mode() widget_value = current_mode.capitalize() if st.session_state.get(THEME_MODE_WIDGET_KEY) != widget_value: st.session_state[THEME_MODE_WIDGET_KEY] = widget_value st.markdown( '
Appearance
', unsafe_allow_html=True, ) st.selectbox( "Theme", ["System", "Light", "Dark"], key=THEME_MODE_WIDGET_KEY, on_change=sync_theme_mode, help="System follows your device preference unless you override it here.", ) st.caption("Built by **Kunal Gupta** (LazyHuman)") cols = st.columns(3) with cols[0]: st.link_button("Web", "https://lazyhideout.tech", use_container_width=True) with cols[1]: st.link_button( "GitHub", "https://github.com/kunalgupta25", use_container_width=True ) with cols[2]: st.link_button( "Ko-fi", "https://ko-fi.com/lazy_human", use_container_width=True ) st.markdown('
', unsafe_allow_html=True) def render_sidebar(): """Render the shared sidebar for pages without extra sidebar sections.""" render_sidebar_intro() render_sidebar_footer() def read_pdf_text(pdf_bytes): """Extract text from PDF bytes with error handling.""" text = [] try: reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) for page in reader.pages: try: page_text = page.extract_text() if page_text: filtered = page_text.encode("utf-16", "surrogatepass").decode( "utf-16", "ignore" ) text.append(filtered) except Exception: pass return "\n".join(text) except Exception: return pdf_bytes.decode("utf-8", errors="ignore") if pdf_bytes else "" def load_subject_context(manifest, semester, subject): """Download and extract text from all files for a given semester + subject. Returns (context_string, source_list) where: - context_string: numbered source blocks for the system prompt - source_list: list of dicts with 'id', 'name', 'type' for citation display """ subject_data = manifest.get(semester, {}).get(subject, {}) parts = [] sources = [] source_id = 0 for file_type, file_list in subject_data.items(): for file_entry in file_list: name = file_entry["name"] mime = get_mime_type(name) if not (mime.startswith("text/") or mime == "application/pdf"): continue try: content = download_github_file(file_entry["download_url"]) if not content: continue if mime == "application/pdf": text = read_pdf_text(content) else: text = content.decode("utf-8", errors="ignore") if text.strip(): source_id += 1 sources.append({"id": source_id, "name": name, "type": file_type}) parts.append( f"[Source {source_id}: {name} ({file_type})]\n{text}\n[End Source {source_id}]" ) except Exception as err: print(f"Error loading {name}: {err}") return "\n\n".join(parts), sources # RAG index loading from GitHub # The index is pre-built by GitHub Actions (build_index.py) and # committed to the materials repo. We just download and load it. EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # must match build_index.py INDEX_FILES = [ "default__vector_store.json", "docstore.json", "graph_store.json", "image__vector_store.json", "index_store.json", ] @st.cache_resource(show_spinner="Loading RAG index...") def fetch_rag_index(): """ Download the pre-built LlamaIndex from the materials repo and return a ready-to-use VectorStoreIndex. Cached once per Streamlit session. Returns (index, error_msg) - index is None if loading failed. """ if not LLAMA_INDEX_AVAILABLE: return ( None, "LlamaIndex not installed - install llama-index-core and dependencies.", ) index_base_url = ( f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/index" ) index_dir = tempfile.mkdtemp(prefix="plexi_index_") try: for filename in INDEX_FILES: url = f"{index_base_url}/{filename}" resp = requests.get(url, timeout=30) resp.raise_for_status() with open(os.path.join(index_dir, filename), "wb") as file_handle: file_handle.write(resp.content) except Exception as err: return None, f"Failed to download index files: {err}" try: embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_ID) Settings.embed_model = embed_model Settings.llm = None storage_context = StorageContext.from_defaults(persist_dir=index_dir) index = load_index_from_storage(storage_context) return index, None except Exception as err: return None, f"Failed to load index: {err}"