import re from html import escape from pathlib import Path from urllib.parse import quote import streamlit as st from streamlit_pdf_viewer import pdf_viewer from utils import ( APP_ICON_PATH, convert_office_to_pdf, download_github_file, get_manifest, get_mime_type, inject_theme, render_page_header, render_sidebar, render_stat_cards, summarize_manifest, summarize_subject_catalog, ) st.set_page_config( page_title="Study Materials Hub", page_icon=APP_ICON_PATH, layout="wide" ) inject_theme() # No longer need strict MIME-type mappings since we check extensions robustly. def format_file_label(filename): """Return a cleaner display label for a stored filename.""" stem = Path(filename).stem return re.sub(r"[._-]+", " ", stem).strip() or filename def get_file_type_label(filename, file_mime): """Return a short human-readable file type label.""" suffix = Path(filename).suffix.lower().lstrip(".") if suffix: return suffix if file_mime.startswith("text/"): return "text" if file_mime == "application/pdf": return "pdf" return file_mime.rsplit("/", 1)[-1] def display_pdf(file_content): """Display PDF using streamlit-pdf-viewer.""" pdf_viewer(file_content, width="100%", height=700) def display_office_document(file_content, download_url, filename): """Display Word / PowerPoint files by converting to PDF server-side. Uses pure Python libraries to extract content and render a PDF preview. If the conversion fails or is unsupported, it falls back to a link that opens the file in Microsoft Office Web Viewer in a new tab. """ suffix = Path(filename).suffix.lower().lstrip(".") type_label = "presentation" if suffix in ("ppt", "pptx") else "document" with st.spinner(f"Converting {type_label} to PDF for preview…"): pdf_bytes = convert_office_to_pdf(file_content, filename) if pdf_bytes: pdf_viewer(pdf_bytes, width="100%", height=700) st.caption( f"Inline preview of `{format_file_label(filename)}` " "(converted to PDF on the server)." ) else: # Fallback – open in Office Web Viewer in a new tab encoded_url = quote(download_url, safe="") preview_url = ( f"https://view.officeapps.live.com/op/view.aspx?src={encoded_url}" ) st.markdown( f"""
📄
{format_file_label(filename)}
Server-side conversion is not available right now. Open the {type_label} in Microsoft Office Web Viewer instead.
""", unsafe_allow_html=True, ) st.link_button( f"🔗 Open {type_label.capitalize()} in Office Viewer", preview_url, use_container_width=True, type="primary", ) st.caption( "Powered by Microsoft Office Web Viewer. " "You can also download the file directly using the button on the right." ) try: manifest = get_manifest() except Exception as err: st.error(f"Failed to load materials catalog: {err}") st.stop() semester_names = sorted(manifest.keys()) if manifest else [] catalog_summary = summarize_manifest(manifest) if manifest else None if not semester_names: st.info("No study materials are available yet. Check back later.") st.stop() selected_semester = None selected_subject = None selected_type = None selected_file_name = None with st.container(): render_page_header( "Material hub", "Browse the catalog without losing context", ( "Move from semester to file in a single flow, preview supported files in " "place, and download the exact asset you want from the shared materials " "repository." ), badges=[ f"{catalog_summary['semester_count']} semesters" if catalog_summary else None, f"{catalog_summary['file_count']} files" if catalog_summary else None, "Inline document preview", ], ) st.markdown( '
Refine Your Selection
', unsafe_allow_html=True, ) st.markdown( """
Catalog filters
Narrow the collection by semester, then drill into one subject and file.
""", unsafe_allow_html=True, ) filter_cols = st.columns(4, gap="medium") with filter_cols[0]: selected_semester = st.selectbox("Semester", semester_names, key="hub_semester") subjects = sorted(manifest[selected_semester].keys()) with filter_cols[1]: selected_subject = st.selectbox("Subject", subjects, key="hub_subject") subject_data = manifest[selected_semester][selected_subject] subject_summary = summarize_subject_catalog(subject_data) types = subject_summary["types"] with filter_cols[2]: selected_type = st.selectbox("Material Type", types, key="hub_type") files_list = subject_data[selected_type] file_names = [file_entry["name"] for file_entry in files_list] with filter_cols[3]: selected_file_name = ( st.selectbox( "File", file_names, key="hub_file", format_func=format_file_label, ) if file_names else None ) selected_file_obj = ( next((item for item in files_list if item["name"] == selected_file_name), None) if selected_file_name else None ) render_stat_cards( [ { "label": "Current Subject", "value": selected_subject, "note": f"{selected_semester} collection currently in focus.", }, { "label": "Available Files", "value": subject_summary["file_count"], "note": "All assets available for this subject across material types.", }, { "label": "Material Types", "value": subject_summary["type_count"], "note": ", ".join(subject_summary["types"]), }, { "label": "Current Bucket", "value": len(files_list), "note": f"Files available inside {selected_type}.", }, ] ) render_sidebar() if not selected_file_obj: st.info("No files were found for this combination yet.") st.stop() try: file_content = download_github_file(selected_file_obj["download_url"]) file_mime = get_mime_type(selected_file_obj["name"]) except Exception as err: st.error(f"Error loading file: {err}") st.stop() if not file_content: st.error("The selected file could not be downloaded.") st.stop() st.markdown( '
Preview And Download
', unsafe_allow_html=True, ) preview_col, info_col = st.columns([1.7, 0.95], gap="large") with preview_col: st.markdown( f"""
{selected_semester}
{format_file_label(selected_file_obj["name"])}
{selected_subject} / {selected_type}
""", unsafe_allow_html=True, ) ext = Path(selected_file_obj["name"]).suffix.lower() if ext == ".pdf": display_pdf(file_content) elif ext in (".ppt", ".pptx", ".doc", ".docx"): display_office_document( file_content, selected_file_obj["download_url"], selected_file_obj["name"] ) elif file_mime.startswith("text/"): # Basic text preview support (optional, if needed) st.code(file_content.decode("utf-8", errors="replace")) else: st.info( "Preview is not available for this file type. Download it to inspect the content." ) with info_col: st.markdown( """
Selected file
Download the current file or switch to another asset in the same bucket.
""", unsafe_allow_html=True, ) st.download_button( label="Download File", data=file_content, file_name=selected_file_obj["name"], mime=file_mime, use_container_width=True, type="primary", ) st.markdown( """
File details
""", unsafe_allow_html=True, ) st.markdown( f"""
Semester
{escape(selected_semester)}
Subject
{escape(selected_subject)}
Material Type
{escape(selected_type)}
Format
{escape(get_file_type_label(selected_file_obj["name"], file_mime).upper())}
""", unsafe_allow_html=True, ) st.markdown( """
More in this bucket
""", unsafe_allow_html=True, ) bucket_items = [] for file_name in file_names: item_class = "current" if file_name == selected_file_obj["name"] else "" label = "Current" if file_name == selected_file_obj["name"] else "Available" bucket_items.append( f'
  • {escape(label)}: {escape(format_file_label(file_name))}
  • ' ) st.markdown( f'
    ', unsafe_allow_html=True, )