import io import mimetypes import os import tempfile from html import escape from string import Template import PyPDF2 import requests import streamlit as st from dotenv import load_dotenv load_dotenv() # LlamaIndex imports for RAG retrieval try: from llama_index.core import Settings, StorageContext, load_index_from_storage from llama_index.embeddings.huggingface import HuggingFaceEmbedding LLAMA_INDEX_AVAILABLE = True except ImportError: LLAMA_INDEX_AVAILABLE = False # GitHub repo that hosts study materials via Releases + manifest.json # Format: "owner/repo" MATERIALS_REPO = os.getenv("MATERIALS_REPO", "KunalGupta25/plexi-materials") MANIFEST_BRANCH = "main" THEME_MODE_STATE_KEY = "plexi_theme_mode" THEME_MODE_WIDGET_KEY = "_plexi_theme_mode_widget" LIGHT_PALETTE = { "ink": "#16312c", "muted": "#5b6c66", "bg": "#f5f0e8", "panel": "rgba(255, 252, 247, 0.88)", "panel_strong": "#fffaf1", "line": "rgba(22, 49, 44, 0.11)", "accent": "#1d7a63", "accent_soft": "#d7efe4", "highlight": "#f4b860", "shadow": "0 18px 60px rgba(30, 48, 43, 0.08)", "app_background": """ radial-gradient(circle at top left, rgba(244, 184, 96, 0.18), transparent 28%), radial-gradient(circle at top right, rgba(29, 122, 99, 0.14), transparent 30%), linear-gradient(180deg, #fbf7ef 0%, #f4ecde 100%) """, "hero_background": """ linear-gradient(135deg, rgba(29, 122, 99, 0.08), rgba(255, 250, 241, 0.92)), rgba(255, 252, 247, 0.88) """, "chip_background": "rgba(29, 122, 99, 0.08)", "chip_border": "rgba(29, 122, 99, 0.12)", "button_border": "rgba(29, 122, 99, 0.14)", "button_surface": "#f8fbfa", "button_hover": "#eef7f2", "primary_button": "linear-gradient(135deg, #1d7a63, #245e74)", "sidebar_background": """ linear-gradient(180deg, rgba(255, 251, 245, 0.98), rgba(246, 238, 224, 0.96)) """, "expander_background": "rgba(255, 251, 245, 0.72)", "meta_background": "rgba(255, 251, 245, 0.72)", "divider": "linear-gradient(90deg, rgba(29, 122, 99, 0.25), transparent)", "meta_row_border": "rgba(22, 49, 44, 0.08)", "bottom_background": "#fbf7ef", } DARK_PALETTE = { "ink": "#eef4ef", "muted": "#b8c6c0", "bg": "#0d1715", "panel": "rgba(20, 31, 29, 0.9)", "panel_strong": "#15211f", "line": "rgba(196, 223, 211, 0.14)", "accent": "#54c6a2", "accent_soft": "#17392f", "highlight": "#f0b564", "shadow": "0 22px 70px rgba(0, 0, 0, 0.32)", "app_background": """ radial-gradient(circle at top left, rgba(240, 181, 100, 0.12), transparent 28%), radial-gradient(circle at top right, rgba(84, 198, 162, 0.12), transparent 32%), linear-gradient(180deg, #0f1b19 0%, #09110f 100%) """, "hero_background": """ linear-gradient(135deg, rgba(84, 198, 162, 0.12), rgba(16, 28, 25, 0.92)), rgba(20, 31, 29, 0.9) """, "chip_background": "rgba(84, 198, 162, 0.12)", "chip_border": "rgba(84, 198, 162, 0.18)", "button_border": "rgba(84, 198, 162, 0.18)", "button_surface": "rgba(84, 198, 162, 0.14)", "button_hover": "rgba(84, 198, 162, 0.22)", "primary_button": "linear-gradient(135deg, #2ea483, #245e74)", "sidebar_background": """ linear-gradient(180deg, rgba(17, 28, 26, 0.98), rgba(12, 20, 18, 0.97)) """, "expander_background": "rgba(17, 28, 26, 0.84)", "meta_background": "rgba(19, 31, 28, 0.84)", "divider": "linear-gradient(90deg, rgba(84, 198, 162, 0.32), transparent)", "meta_row_border": "rgba(196, 223, 211, 0.1)", "bottom_background": "#09110f", } def get_theme_mode(): """Return the selected appearance mode.""" if THEME_MODE_STATE_KEY not in st.session_state: st.session_state[THEME_MODE_STATE_KEY] = "system" return st.session_state[THEME_MODE_STATE_KEY] def sync_theme_mode(): """Persist the appearance selector value across page switches.""" st.session_state[THEME_MODE_STATE_KEY] = st.session_state.get( THEME_MODE_WIDGET_KEY, "System" ).lower() def _css_vars_block(palette): """Return CSS custom property definitions for a palette.""" return "\n".join( [ f" --plexi-ink: {palette['ink']};", f" --plexi-muted: {palette['muted']};", f" --plexi-bg: {palette['bg']};", f" --plexi-panel: {palette['panel']};", f" --plexi-panel-strong: {palette['panel_strong']};", f" --plexi-line: {palette['line']};", f" --plexi-accent: {palette['accent']};", f" --plexi-accent-soft: {palette['accent_soft']};", f" --plexi-highlight: {palette['highlight']};", f" --plexi-shadow: {palette['shadow']};", f" --plexi-app-background: {palette['app_background']};", f" --plexi-hero-background: {palette['hero_background']};", f" --plexi-chip-background: {palette['chip_background']};", f" --plexi-chip-border: {palette['chip_border']};", f" --plexi-button-border: {palette['button_border']};", f" --plexi-button-surface: {palette['button_surface']};", f" --plexi-button-hover: {palette['button_hover']};", f" --plexi-primary-button: {palette['primary_button']};", f" --plexi-sidebar-background: {palette['sidebar_background']};", f" --plexi-expander-background: {palette['expander_background']};", f" --plexi-meta-background: {palette['meta_background']};", f" --plexi-divider: {palette['divider']};", f" --plexi-meta-row-border: {palette['meta_row_border']};", f" --plexi-bottom-background: {palette['bottom_background']};", ] ) def inject_theme(): """Inject the shared visual language for the Streamlit app.""" theme_mode = get_theme_mode() palette = DARK_PALETTE if theme_mode == "dark" else LIGHT_PALETTE system_css = "" color_scheme = "dark" if theme_mode == "dark" else "light" if theme_mode == "system": system_css = f""" @media (prefers-color-scheme: dark) {{ :root {{ {_css_vars_block(DARK_PALETTE)} }} html {{ color-scheme: dark; }} }} """ css = Template( """ """ ).substitute( { "palette_vars": _css_vars_block(palette), "color_scheme": color_scheme, "system_css": system_css, } ) st.markdown(css, unsafe_allow_html=True) def summarize_manifest(manifest): """Return top-level counts for the materials catalog.""" subject_total = sum(len(subjects) for subjects in manifest.values()) file_total = sum( len(files) for subjects in manifest.values() for types in subjects.values() for files in types.values() ) material_types = sorted( { material_type for subjects in manifest.values() for types in subjects.values() for material_type in types.keys() } ) return { "semester_count": len(manifest), "subject_count": subject_total, "file_count": file_total, "material_types": material_types, } def summarize_subject_catalog(subject_data): """Return counts for one selected subject catalog.""" return { "type_count": len(subject_data), "file_count": sum(len(files) for files in subject_data.values()), "types": sorted(subject_data.keys()), } def render_page_header(kicker, title, subtitle, badges=None): """Render a shared hero block for each page.""" badge_html = "" if badges: badge_html = "".join( f'{escape(str(badge))}' for badge in badges if badge ) badge_html = f'
{badge_html}
' st.markdown( f"""
{escape(kicker)}

{escape(title)}

{escape(subtitle)}

{badge_html}
""", unsafe_allow_html=True, ) def render_stat_cards(cards): """Render compact metrics in a responsive grid.""" if not cards: return cols = st.columns(len(cards)) for col, card in zip(cols, cards): label = escape(str(card.get("label", ""))) value = escape(str(card.get("value", ""))) note = escape(str(card.get("note", ""))) with col: st.markdown( f"""
{label}
{value}
{note}
""", unsafe_allow_html=True, ) def render_panel(title, body, tone="default"): """Render a simple informational panel.""" panel_class = "plexi-callout" if tone == "callout" else "plexi-panel" st.markdown( f"""
{escape(title)}
{escape(body)}
""", unsafe_allow_html=True, ) def _manifest_url(): """Raw GitHub URL for manifest.json.""" return f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/manifest.json" @st.cache_data(ttl=300, show_spinner=False) def get_manifest(): """Fetch the materials manifest from GitHub. Cached for 5 minutes.""" url = _manifest_url() resp = requests.get(url, timeout=15) resp.raise_for_status() return resp.json() def download_github_file(download_url, max_retries=3): """Download a file from a GitHub Release asset URL with retry logic.""" for attempt in range(max_retries): try: resp = requests.get(download_url, timeout=60) resp.raise_for_status() return resp.content except requests.RequestException as err: print(f"Download error (attempt {attempt + 1}): {err}") if attempt == max_retries - 1: raise return None def get_mime_type(filename): """Guess MIME type from filename extension.""" mime, _ = mimetypes.guess_type(filename) return mime or "application/octet-stream" def render_sidebar(): """Render the shared sidebar with branding and outbound links.""" with st.sidebar: current_mode = get_theme_mode() widget_value = current_mode.capitalize() if st.session_state.get(THEME_MODE_WIDGET_KEY) != widget_value: st.session_state[THEME_MODE_WIDGET_KEY] = widget_value st.markdown( """
Plexi
Grounded study assistant
Browse materials, preview files, and ask questions backed by the currently loaded course content.
""", unsafe_allow_html=True, ) st.markdown( '
Appearance
', unsafe_allow_html=True, ) st.selectbox( "Theme", ["System", "Light", "Dark"], key=THEME_MODE_WIDGET_KEY, on_change=sync_theme_mode, help="System follows your device preference unless you override it here.", ) st.caption("Built by **Kunal Gupta** (LazyHuman)") cols = st.columns(3) with cols[0]: st.link_button("Web", "https://lazyhideout.tech", use_container_width=True) with cols[1]: st.link_button( "GitHub", "https://github.com/kunalgupta25", use_container_width=True ) with cols[2]: st.link_button( "Ko-fi", "https://ko-fi.com/lazy_human", use_container_width=True ) st.markdown('
', unsafe_allow_html=True) def read_pdf_text(pdf_bytes): """Extract text from PDF bytes with error handling.""" text = [] try: reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) for page in reader.pages: try: page_text = page.extract_text() if page_text: filtered = page_text.encode("utf-16", "surrogatepass").decode( "utf-16", "ignore" ) text.append(filtered) except Exception: pass return "\n".join(text) except Exception: return pdf_bytes.decode("utf-8", errors="ignore") if pdf_bytes else "" def load_subject_context(manifest, semester, subject): """Download and extract text from all files for a given semester + subject. Returns (context_string, source_list) where: - context_string: numbered source blocks for the system prompt - source_list: list of dicts with 'id', 'name', 'type' for citation display """ subject_data = manifest.get(semester, {}).get(subject, {}) parts = [] sources = [] source_id = 0 for file_type, file_list in subject_data.items(): for file_entry in file_list: name = file_entry["name"] mime = get_mime_type(name) if not (mime.startswith("text/") or mime == "application/pdf"): continue try: content = download_github_file(file_entry["download_url"]) if not content: continue if mime == "application/pdf": text = read_pdf_text(content) else: text = content.decode("utf-8", errors="ignore") if text.strip(): source_id += 1 sources.append({"id": source_id, "name": name, "type": file_type}) parts.append( f"[Source {source_id}: {name} ({file_type})]\n{text}\n[End Source {source_id}]" ) except Exception as err: print(f"Error loading {name}: {err}") return "\n\n".join(parts), sources # RAG index loading from GitHub # The index is pre-built by GitHub Actions (build_index.py) and # committed to the materials repo. We just download and load it. EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # must match build_index.py INDEX_FILES = [ "default__vector_store.json", "docstore.json", "graph_store.json", "image__vector_store.json", "index_store.json", ] @st.cache_resource(show_spinner="Loading RAG index...") def fetch_rag_index(): """ Download the pre-built LlamaIndex from the materials repo and return a ready-to-use VectorStoreIndex. Cached once per Streamlit session. Returns (index, error_msg) - index is None if loading failed. """ if not LLAMA_INDEX_AVAILABLE: return ( None, "LlamaIndex not installed - install llama-index-core and dependencies.", ) index_base_url = ( f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/index" ) index_dir = tempfile.mkdtemp(prefix="plexi_index_") try: for filename in INDEX_FILES: url = f"{index_base_url}/{filename}" resp = requests.get(url, timeout=30) resp.raise_for_status() with open(os.path.join(index_dir, filename), "wb") as file_handle: file_handle.write(resp.content) except Exception as err: return None, f"Failed to download index files: {err}" try: embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_ID) Settings.embed_model = embed_model Settings.llm = None storage_context = StorageContext.from_defaults(persist_dir=index_dir) index = load_index_from_storage(storage_context) return index, None except Exception as err: return None, f"Failed to load index: {err}"