plexi / utils.py
LazyHuman10
Prepare Hugging Face Space deployment
fbe7a99
raw
history blame
30.6 kB
import io
import mimetypes
import os
import tempfile
from html import escape
from string import Template
import PyPDF2
import requests
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
# LlamaIndex imports for RAG retrieval
try:
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
LLAMA_INDEX_AVAILABLE = True
except ImportError:
LLAMA_INDEX_AVAILABLE = False
# GitHub repo that hosts study materials via Releases + manifest.json
# Format: "owner/repo"
MATERIALS_REPO = os.getenv("MATERIALS_REPO", "KunalGupta25/plexi-materials")
MANIFEST_BRANCH = "main"
THEME_MODE_STATE_KEY = "plexi_theme_mode"
THEME_MODE_WIDGET_KEY = "_plexi_theme_mode_widget"
LIGHT_PALETTE = {
"ink": "#16312c",
"muted": "#5b6c66",
"bg": "#f5f0e8",
"panel": "rgba(255, 252, 247, 0.88)",
"panel_strong": "#fffaf1",
"line": "rgba(22, 49, 44, 0.11)",
"accent": "#1d7a63",
"accent_soft": "#d7efe4",
"highlight": "#f4b860",
"shadow": "0 18px 60px rgba(30, 48, 43, 0.08)",
"app_background": """
radial-gradient(circle at top left, rgba(244, 184, 96, 0.18), transparent 28%),
radial-gradient(circle at top right, rgba(29, 122, 99, 0.14), transparent 30%),
linear-gradient(180deg, #fbf7ef 0%, #f4ecde 100%)
""",
"hero_background": """
linear-gradient(135deg, rgba(29, 122, 99, 0.08), rgba(255, 250, 241, 0.92)),
rgba(255, 252, 247, 0.88)
""",
"chip_background": "rgba(29, 122, 99, 0.08)",
"chip_border": "rgba(29, 122, 99, 0.12)",
"button_border": "rgba(29, 122, 99, 0.14)",
"button_surface": "#f8fbfa",
"button_hover": "#eef7f2",
"primary_button": "linear-gradient(135deg, #1d7a63, #245e74)",
"sidebar_background": """
linear-gradient(180deg, rgba(255, 251, 245, 0.98), rgba(246, 238, 224, 0.96))
""",
"expander_background": "rgba(255, 251, 245, 0.72)",
"meta_background": "rgba(255, 251, 245, 0.72)",
"divider": "linear-gradient(90deg, rgba(29, 122, 99, 0.25), transparent)",
"meta_row_border": "rgba(22, 49, 44, 0.08)",
"bottom_background": "#fbf7ef",
}
DARK_PALETTE = {
"ink": "#eef4ef",
"muted": "#b8c6c0",
"bg": "#0d1715",
"panel": "rgba(20, 31, 29, 0.9)",
"panel_strong": "#15211f",
"line": "rgba(196, 223, 211, 0.14)",
"accent": "#54c6a2",
"accent_soft": "#17392f",
"highlight": "#f0b564",
"shadow": "0 22px 70px rgba(0, 0, 0, 0.32)",
"app_background": """
radial-gradient(circle at top left, rgba(240, 181, 100, 0.12), transparent 28%),
radial-gradient(circle at top right, rgba(84, 198, 162, 0.12), transparent 32%),
linear-gradient(180deg, #0f1b19 0%, #09110f 100%)
""",
"hero_background": """
linear-gradient(135deg, rgba(84, 198, 162, 0.12), rgba(16, 28, 25, 0.92)),
rgba(20, 31, 29, 0.9)
""",
"chip_background": "rgba(84, 198, 162, 0.12)",
"chip_border": "rgba(84, 198, 162, 0.18)",
"button_border": "rgba(84, 198, 162, 0.18)",
"button_surface": "rgba(84, 198, 162, 0.14)",
"button_hover": "rgba(84, 198, 162, 0.22)",
"primary_button": "linear-gradient(135deg, #2ea483, #245e74)",
"sidebar_background": """
linear-gradient(180deg, rgba(17, 28, 26, 0.98), rgba(12, 20, 18, 0.97))
""",
"expander_background": "rgba(17, 28, 26, 0.84)",
"meta_background": "rgba(19, 31, 28, 0.84)",
"divider": "linear-gradient(90deg, rgba(84, 198, 162, 0.32), transparent)",
"meta_row_border": "rgba(196, 223, 211, 0.1)",
"bottom_background": "#09110f",
}
def get_theme_mode():
"""Return the selected appearance mode."""
if THEME_MODE_STATE_KEY not in st.session_state:
st.session_state[THEME_MODE_STATE_KEY] = "system"
return st.session_state[THEME_MODE_STATE_KEY]
def sync_theme_mode():
"""Persist the appearance selector value across page switches."""
st.session_state[THEME_MODE_STATE_KEY] = st.session_state.get(
THEME_MODE_WIDGET_KEY, "System"
).lower()
def _css_vars_block(palette):
"""Return CSS custom property definitions for a palette."""
return "\n".join(
[
f" --plexi-ink: {palette['ink']};",
f" --plexi-muted: {palette['muted']};",
f" --plexi-bg: {palette['bg']};",
f" --plexi-panel: {palette['panel']};",
f" --plexi-panel-strong: {palette['panel_strong']};",
f" --plexi-line: {palette['line']};",
f" --plexi-accent: {palette['accent']};",
f" --plexi-accent-soft: {palette['accent_soft']};",
f" --plexi-highlight: {palette['highlight']};",
f" --plexi-shadow: {palette['shadow']};",
f" --plexi-app-background: {palette['app_background']};",
f" --plexi-hero-background: {palette['hero_background']};",
f" --plexi-chip-background: {palette['chip_background']};",
f" --plexi-chip-border: {palette['chip_border']};",
f" --plexi-button-border: {palette['button_border']};",
f" --plexi-button-surface: {palette['button_surface']};",
f" --plexi-button-hover: {palette['button_hover']};",
f" --plexi-primary-button: {palette['primary_button']};",
f" --plexi-sidebar-background: {palette['sidebar_background']};",
f" --plexi-expander-background: {palette['expander_background']};",
f" --plexi-meta-background: {palette['meta_background']};",
f" --plexi-divider: {palette['divider']};",
f" --plexi-meta-row-border: {palette['meta_row_border']};",
f" --plexi-bottom-background: {palette['bottom_background']};",
]
)
def inject_theme():
"""Inject the shared visual language for the Streamlit app."""
theme_mode = get_theme_mode()
palette = DARK_PALETTE if theme_mode == "dark" else LIGHT_PALETTE
system_css = ""
color_scheme = "dark" if theme_mode == "dark" else "light"
if theme_mode == "system":
system_css = f"""
@media (prefers-color-scheme: dark) {{
:root {{
{_css_vars_block(DARK_PALETTE)}
}}
html {{
color-scheme: dark;
}}
}}
"""
css = Template(
"""
<style>
@import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=Space+Grotesk:wght@400;500;700&display=swap');
:root {
$palette_vars
}
html, body, [class*="css"] {
font-family: "Space Grotesk", "Segoe UI", sans-serif;
}
html {
color-scheme: $color_scheme;
}
.stApp {
background: var(--plexi-app-background);
color: var(--plexi-ink);
}
header[data-testid="stHeader"] {
background: transparent !important;
}
div[data-testid="stToolbar"] {
background: transparent !important;
}
div[data-testid="stAppViewContainer"] {
background: transparent;
}
.block-container {
padding-top: 2.2rem;
padding-bottom: 3rem;
}
h1, h2, h3 {
color: var(--plexi-ink);
}
h1, .plexi-title {
font-family: "DM Serif Display", Georgia, serif;
letter-spacing: -0.03em;
}
p, li, .stMarkdown, .stCaption, .stChatMessage {
color: var(--plexi-ink);
}
.plexi-hero,
.plexi-panel,
.plexi-stat,
.plexi-sidecard,
.plexi-callout {
background: var(--plexi-panel);
border: 1px solid var(--plexi-line);
border-radius: 24px;
box-shadow: var(--plexi-shadow);
}
.plexi-hero {
padding: 1.8rem 1.9rem;
margin-bottom: 1.1rem;
background: var(--plexi-hero-background);
}
.plexi-kicker {
text-transform: uppercase;
letter-spacing: 0.16em;
font-size: 0.72rem;
font-weight: 700;
color: var(--plexi-accent);
margin-bottom: 0.65rem;
}
.plexi-title {
font-size: clamp(2.2rem, 5vw, 4.2rem);
margin: 0;
line-height: 0.95;
}
.plexi-subtitle {
max-width: 48rem;
margin: 0.8rem 0 0;
color: var(--plexi-muted);
font-size: 1rem;
line-height: 1.65;
}
.plexi-chip-row {
display: flex;
gap: 0.55rem;
flex-wrap: wrap;
margin-top: 1rem;
}
.plexi-chip {
display: inline-flex;
align-items: center;
gap: 0.35rem;
padding: 0.45rem 0.8rem;
border-radius: 999px;
background: var(--plexi-chip-background);
border: 1px solid var(--plexi-chip-border);
font-size: 0.82rem;
color: var(--plexi-ink);
}
.plexi-panel,
.plexi-callout,
.plexi-sidecard {
padding: 1.15rem 1.2rem;
margin-bottom: 1rem;
}
.plexi-stat {
padding: 1rem 1.15rem;
min-height: 8.5rem;
overflow: hidden;
}
.plexi-stat-label {
color: var(--plexi-muted);
font-size: 0.82rem;
text-transform: uppercase;
letter-spacing: 0.08em;
}
.plexi-stat-value {
font-family: "DM Serif Display", Georgia, serif;
font-size: clamp(1.5rem, 2.1vw, 2.1rem);
line-height: 1.08;
margin: 0.35rem 0 0.4rem;
overflow-wrap: anywhere;
}
.plexi-stat-note,
.plexi-muted {
color: var(--plexi-muted);
font-size: 0.92rem;
line-height: 1.55;
}
.plexi-section-label {
margin: 1.6rem 0 0.8rem;
text-transform: uppercase;
letter-spacing: 0.12em;
color: var(--plexi-accent);
font-size: 0.74rem;
font-weight: 700;
}
.plexi-list {
margin: 0;
padding-left: 1rem;
color: var(--plexi-muted);
line-height: 1.7;
}
.plexi-cta-grid {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 1rem;
margin: 1rem 0 1.4rem;
align-items: stretch;
}
.plexi-cta-button {
display: flex;
align-items: center;
justify-content: center;
width: 100%;
min-height: 3.6rem;
padding: 0.9rem 1.2rem;
border-radius: 999px;
text-decoration: none !important;
font-weight: 700;
font-size: 1.02rem;
color: #ffffff !important;
background: linear-gradient(135deg, #3bb192, #2b728a);
box-shadow: 0 16px 40px rgba(38, 109, 107, 0.22);
border: none;
transition: transform 120ms ease, box-shadow 120ms ease, opacity 120ms ease;
}
.plexi-cta-button:hover {
color: #ffffff !important;
transform: translateY(-1px);
box-shadow: 0 18px 44px rgba(38, 109, 107, 0.28);
}
.plexi-prompt button,
.stButton > button,
.stDownloadButton > button,
.stLinkButton > a {
border-radius: 999px !important;
}
.stButton > button,
.stDownloadButton > button,
.stLinkButton > a {
border: 1px solid var(--plexi-button-border);
min-height: 2.85rem;
background: var(--plexi-button-surface);
color: var(--plexi-ink) !important;
box-shadow: none !important;
}
.stButton > button[kind="primary"],
.stDownloadButton > button[kind="primary"] {
background: var(--plexi-primary-button);
color: white;
border: none;
}
.stLinkButton > a {
display: flex;
align-items: center;
justify-content: center;
text-decoration: none !important;
}
.stLinkButton > a:hover,
.stButton > button:hover,
.stDownloadButton > button:hover {
border-color: var(--plexi-accent);
background: var(--plexi-button-hover);
color: var(--plexi-ink) !important;
}
.stButton > button:disabled,
.stDownloadButton > button:disabled,
.stLinkButton > a[disabled] {
opacity: 0.55;
color: var(--plexi-muted) !important;
}
.stTextInput input,
.stSelectbox [data-baseweb="select"] > div,
.stTextArea textarea,
.stChatInput textarea {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-ink) !important;
border-color: var(--plexi-line) !important;
}
.stSelectbox [data-baseweb="select"] *,
.stTextInput input::placeholder,
.stTextArea textarea::placeholder,
.stChatInput textarea::placeholder {
color: var(--plexi-muted) !important;
}
div[data-baseweb="select"] svg,
div[data-baseweb="select"] path {
color: var(--plexi-accent) !important;
fill: var(--plexi-accent) !important;
}
.stChatInputContainer,
div[data-testid="stChatMessage"] {
border-radius: 22px;
}
div[data-testid="stBottomBlockContainer"],
div[data-testid="stBottomBlockContainer"] > div,
div[data-testid="stBottomBlockContainer"] > div > div,
div[data-testid="stChatInput"],
div[data-testid="stChatInput"] > div,
div[data-testid="stChatInput"] form,
div[data-testid="stChatInput"] form > div,
.stChatInputContainer {
background: var(--plexi-bottom-background) !important;
}
div[data-testid="stChatInput"] {
border-top: none !important;
padding-top: 0.5rem;
}
div[data-testid="stChatInput"] textarea,
div[data-testid="stChatInput"] section,
div[data-testid="stChatInput"] [data-baseweb="textarea"] {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-ink) !important;
border-color: var(--plexi-line) !important;
}
div[data-testid="stChatInput"] button {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-accent) !important;
border: 1px solid var(--plexi-button-border) !important;
}
div[data-testid="stChatInput"] button svg,
div[data-testid="stChatInput"] button path {
fill: currentColor !important;
}
div[data-testid="stSidebar"],
div[data-testid="stSidebar"] > div,
section[data-testid="stSidebar"] {
background: var(--plexi-sidebar-background);
border-right: 1px solid var(--plexi-line);
}
div[data-testid="stSidebar"] .block-container {
padding-top: 1.2rem;
}
div[data-testid="stSidebarNav"],
div[data-testid="stSidebarNav"] ul,
div[data-testid="stSidebarNav"] li,
div[data-testid="stSidebarUserContent"] {
background: transparent !important;
}
div[data-testid="stSidebarNav"] a,
div[data-testid="stSidebarNav"] span,
div[data-testid="stSidebarNav"] button {
color: var(--plexi-ink) !important;
}
div[data-testid="stSidebarNav"] a:hover {
background: rgba(255, 255, 255, 0.04);
}
div[data-testid="stExpander"] {
border-radius: 18px;
border-color: var(--plexi-line);
background: var(--plexi-expander-background);
}
div[data-baseweb="popover"],
div[data-baseweb="popover"] > div,
div[data-baseweb="popover"] > div > div,
div[data-baseweb="popover"] > div > div > div,
div[data-baseweb="menu"],
div[data-baseweb="menu"] > div,
div[role="listbox"],
ul[role="listbox"] {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-ink) !important;
border: 1px solid var(--plexi-line) !important;
box-shadow: var(--plexi-shadow) !important;
}
div[data-baseweb="popover"] *,
div[data-baseweb="menu"] *,
div[role="listbox"] *,
ul[role="listbox"] * {
color: var(--plexi-ink) !important;
}
div[data-baseweb="popover"] ul,
div[data-baseweb="popover"] li,
div[data-baseweb="popover"] li > div,
div[data-baseweb="menu"] ul,
div[data-baseweb="menu"] li,
div[data-baseweb="menu"] li > div {
background: var(--plexi-panel-strong) !important;
}
li[role="option"],
li[role="option"] > div,
li[role="option"] * {
background: transparent !important;
color: var(--plexi-ink) !important;
}
li[role="option"]:hover,
li[role="option"]:hover > div,
li[role="option"]:hover *,
li[role="option"][aria-selected="true"] {
background: var(--plexi-accent-soft) !important;
color: var(--plexi-ink) !important;
}
li[role="option"][aria-selected="true"] > div,
li[role="option"][aria-selected="true"] * {
background: var(--plexi-accent-soft) !important;
color: var(--plexi-ink) !important;
}
.plexi-sidecard-title {
font-family: "DM Serif Display", Georgia, serif;
font-size: 1.25rem;
margin-bottom: 0.35rem;
}
.plexi-meta {
background: var(--plexi-meta-background);
border: 1px solid var(--plexi-line);
border-radius: 18px;
padding: 0.85rem 1rem;
margin-bottom: 1rem;
}
.plexi-meta-row {
display: flex;
justify-content: space-between;
gap: 1rem;
align-items: flex-start;
padding: 0.65rem 0;
border-bottom: 1px solid var(--plexi-meta-row-border);
}
.plexi-meta-row:last-child {
border-bottom: none;
padding-bottom: 0;
}
.plexi-meta-row:first-child {
padding-top: 0;
}
.plexi-meta-key {
color: var(--plexi-muted);
font-size: 0.78rem;
text-transform: uppercase;
letter-spacing: 0.08em;
flex: 0 0 38%;
}
.plexi-meta-value {
text-align: right;
color: var(--plexi-ink);
font-size: 0.96rem;
line-height: 1.5;
overflow-wrap: anywhere;
}
.plexi-filelist {
margin: 0;
padding-left: 1.1rem;
color: var(--plexi-muted);
line-height: 1.7;
}
.plexi-filelist li.current {
color: var(--plexi-ink);
font-weight: 600;
}
.plexi-divider {
height: 1px;
background: var(--plexi-divider);
margin: 1rem 0 1.1rem;
}
$system_css
@media (max-width: 900px) {
.block-container {
padding-top: 1.2rem;
}
.plexi-hero {
padding: 1.35rem 1.2rem;
border-radius: 20px;
}
.plexi-stat {
min-height: 0;
}
.plexi-meta-row {
display: block;
}
.plexi-meta-value {
text-align: left;
margin-top: 0.2rem;
}
.plexi-cta-grid {
grid-template-columns: 1fr;
}
}
</style>
"""
).substitute(
{
"palette_vars": _css_vars_block(palette),
"color_scheme": color_scheme,
"system_css": system_css,
}
)
st.markdown(css, unsafe_allow_html=True)
def summarize_manifest(manifest):
"""Return top-level counts for the materials catalog."""
subject_total = sum(len(subjects) for subjects in manifest.values())
file_total = sum(
len(files)
for subjects in manifest.values()
for types in subjects.values()
for files in types.values()
)
material_types = sorted(
{
material_type
for subjects in manifest.values()
for types in subjects.values()
for material_type in types.keys()
}
)
return {
"semester_count": len(manifest),
"subject_count": subject_total,
"file_count": file_total,
"material_types": material_types,
}
def summarize_subject_catalog(subject_data):
"""Return counts for one selected subject catalog."""
return {
"type_count": len(subject_data),
"file_count": sum(len(files) for files in subject_data.values()),
"types": sorted(subject_data.keys()),
}
def render_page_header(kicker, title, subtitle, badges=None):
"""Render a shared hero block for each page."""
badge_html = ""
if badges:
badge_html = "".join(
f'<span class="plexi-chip">{escape(str(badge))}</span>'
for badge in badges
if badge
)
badge_html = f'<div class="plexi-chip-row">{badge_html}</div>'
st.markdown(
f"""
<section class="plexi-hero">
<div class="plexi-kicker">{escape(kicker)}</div>
<h1 class="plexi-title">{escape(title)}</h1>
<p class="plexi-subtitle">{escape(subtitle)}</p>
{badge_html}
</section>
""",
unsafe_allow_html=True,
)
def render_stat_cards(cards):
"""Render compact metrics in a responsive grid."""
if not cards:
return
cols = st.columns(len(cards))
for col, card in zip(cols, cards):
label = escape(str(card.get("label", "")))
value = escape(str(card.get("value", "")))
note = escape(str(card.get("note", "")))
with col:
st.markdown(
f"""
<div class="plexi-stat">
<div class="plexi-stat-label">{label}</div>
<div class="plexi-stat-value">{value}</div>
<div class="plexi-stat-note">{note}</div>
</div>
""",
unsafe_allow_html=True,
)
def render_panel(title, body, tone="default"):
"""Render a simple informational panel."""
panel_class = "plexi-callout" if tone == "callout" else "plexi-panel"
st.markdown(
f"""
<section class="{panel_class}">
<div class="plexi-sidecard-title">{escape(title)}</div>
<div class="plexi-muted">{escape(body)}</div>
</section>
""",
unsafe_allow_html=True,
)
def _manifest_url():
"""Raw GitHub URL for manifest.json."""
return f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/manifest.json"
@st.cache_data(ttl=300, show_spinner=False)
def get_manifest():
"""Fetch the materials manifest from GitHub. Cached for 5 minutes."""
url = _manifest_url()
resp = requests.get(url, timeout=15)
resp.raise_for_status()
return resp.json()
def download_github_file(download_url, max_retries=3):
"""Download a file from a GitHub Release asset URL with retry logic."""
for attempt in range(max_retries):
try:
resp = requests.get(download_url, timeout=60)
resp.raise_for_status()
return resp.content
except requests.RequestException as err:
print(f"Download error (attempt {attempt + 1}): {err}")
if attempt == max_retries - 1:
raise
return None
def get_mime_type(filename):
"""Guess MIME type from filename extension."""
mime, _ = mimetypes.guess_type(filename)
return mime or "application/octet-stream"
def render_sidebar():
"""Render the shared sidebar with branding and outbound links."""
with st.sidebar:
current_mode = get_theme_mode()
widget_value = current_mode.capitalize()
if st.session_state.get(THEME_MODE_WIDGET_KEY) != widget_value:
st.session_state[THEME_MODE_WIDGET_KEY] = widget_value
st.markdown(
"""
<section class="plexi-sidecard">
<div class="plexi-kicker">Plexi</div>
<div class="plexi-sidecard-title">Grounded study assistant</div>
<div class="plexi-muted">
Browse materials, preview files, and ask questions backed by the
currently loaded course content.
</div>
</section>
""",
unsafe_allow_html=True,
)
st.markdown(
'<div class="plexi-section-label">Appearance</div>',
unsafe_allow_html=True,
)
st.selectbox(
"Theme",
["System", "Light", "Dark"],
key=THEME_MODE_WIDGET_KEY,
on_change=sync_theme_mode,
help="System follows your device preference unless you override it here.",
)
st.caption("Built by **Kunal Gupta** (LazyHuman)")
cols = st.columns(3)
with cols[0]:
st.link_button("Web", "https://lazyhideout.tech", use_container_width=True)
with cols[1]:
st.link_button(
"GitHub", "https://github.com/kunalgupta25", use_container_width=True
)
with cols[2]:
st.link_button(
"Ko-fi", "https://ko-fi.com/lazy_human", use_container_width=True
)
st.markdown('<div class="plexi-divider"></div>', unsafe_allow_html=True)
def read_pdf_text(pdf_bytes):
"""Extract text from PDF bytes with error handling."""
text = []
try:
reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
for page in reader.pages:
try:
page_text = page.extract_text()
if page_text:
filtered = page_text.encode("utf-16", "surrogatepass").decode(
"utf-16", "ignore"
)
text.append(filtered)
except Exception:
pass
return "\n".join(text)
except Exception:
return pdf_bytes.decode("utf-8", errors="ignore") if pdf_bytes else ""
def load_subject_context(manifest, semester, subject):
"""Download and extract text from all files for a given semester + subject.
Returns (context_string, source_list) where:
- context_string: numbered source blocks for the system prompt
- source_list: list of dicts with 'id', 'name', 'type' for citation display
"""
subject_data = manifest.get(semester, {}).get(subject, {})
parts = []
sources = []
source_id = 0
for file_type, file_list in subject_data.items():
for file_entry in file_list:
name = file_entry["name"]
mime = get_mime_type(name)
if not (mime.startswith("text/") or mime == "application/pdf"):
continue
try:
content = download_github_file(file_entry["download_url"])
if not content:
continue
if mime == "application/pdf":
text = read_pdf_text(content)
else:
text = content.decode("utf-8", errors="ignore")
if text.strip():
source_id += 1
sources.append({"id": source_id, "name": name, "type": file_type})
parts.append(
f"[Source {source_id}: {name} ({file_type})]\n{text}\n[End Source {source_id}]"
)
except Exception as err:
print(f"Error loading {name}: {err}")
return "\n\n".join(parts), sources
# RAG index loading from GitHub
# The index is pre-built by GitHub Actions (build_index.py) and
# committed to the materials repo. We just download and load it.
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # must match build_index.py
INDEX_FILES = [
"default__vector_store.json",
"docstore.json",
"graph_store.json",
"image__vector_store.json",
"index_store.json",
]
@st.cache_resource(show_spinner="Loading RAG index...")
def fetch_rag_index():
"""
Download the pre-built LlamaIndex from the materials repo and return
a ready-to-use VectorStoreIndex. Cached once per Streamlit session.
Returns (index, error_msg) - index is None if loading failed.
"""
if not LLAMA_INDEX_AVAILABLE:
return (
None,
"LlamaIndex not installed - install llama-index-core and dependencies.",
)
index_base_url = (
f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/index"
)
index_dir = tempfile.mkdtemp(prefix="plexi_index_")
try:
for filename in INDEX_FILES:
url = f"{index_base_url}/{filename}"
resp = requests.get(url, timeout=30)
resp.raise_for_status()
with open(os.path.join(index_dir, filename), "wb") as file_handle:
file_handle.write(resp.content)
except Exception as err:
return None, f"Failed to download index files: {err}"
try:
embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_ID)
Settings.embed_model = embed_model
Settings.llm = None
storage_context = StorageContext.from_defaults(persist_dir=index_dir)
index = load_index_from_storage(storage_context)
return index, None
except Exception as err:
return None, f"Failed to load index: {err}"