import nltk import validators import streamlit as st from transformers import AutoTokenizer, pipeline from utils import ( clean_text, fetch_article_text, preprocess_text_for_abstractive_summarization, read_text_from_file, ) ABS_TOKENIZER_NAME = "facebook/bart-large-cnn" ABS_MODEL_NAME = "facebook/bart-large-cnn" ABS_MIN_LENGTH = 30 ABS_MAX_LENGTH = 130 @st.cache_resource def load_tokenizer(): return AutoTokenizer.from_pretrained(ABS_TOKENIZER_NAME) @st.cache_resource def load_summarizer(): return pipeline( "summarization", model=ABS_MODEL_NAME, tokenizer=ABS_TOKENIZER_NAME, ) def normalize_input_text(inp_text, uploaded_file): is_url = bool(inp_text and validators.url(inp_text)) if is_url: _, clean_txt = fetch_article_text(url=inp_text) elif uploaded_file: clean_txt = read_text_from_file(uploaded_file) clean_txt = clean_text(clean_txt) else: clean_txt = clean_text(inp_text) return is_url, clean_txt if __name__ == "__main__": st.set_page_config(page_title="Text Summarization Tool", page_icon="📝") st.title("Text Summarization Tool 📝") st.markdown("---") st.markdown( """ This app creates **abstractive summaries** using a Hugging Face Transformers summarization pipeline. - Paste text - Enter a URL - Or upload a `.txt`, `.pdf`, or `.docx` file """ ) nltk.download("punkt", quiet=True) abs_tokenizer = load_tokenizer() abs_summarizer = load_summarizer() inp_text = st.text_input("Enter text or a URL here") st.markdown("

OR

", unsafe_allow_html=True) uploaded_file = st.file_uploader( "Upload a .txt, .pdf, .docx file for summarization" ) is_url, clean_txt = normalize_input_text(inp_text, uploaded_file) with st.expander("View Input Text"): if isinstance(clean_txt, list): st.write(" ".join(clean_txt)) else: st.write(clean_txt) summarize = st.button("Summarize") if summarize: if not clean_txt: st.warning("Please enter text, a URL, or upload a file.") st.stop() with st.spinner("Creating summary. This might take a few seconds..."): if is_url: text_chunks = clean_txt if isinstance(clean_txt, list) else [clean_txt] else: if isinstance(clean_txt, list): text_chunks = clean_txt else: text_chunks = preprocess_text_for_abstractive_summarization( tokenizer=abs_tokenizer, text=clean_txt, ) if isinstance(text_chunks, str): text_chunks = [text_chunks] summaries = [] for chunk in text_chunks: if not chunk or not chunk.strip(): continue result = abs_summarizer( chunk, max_length=ABS_MAX_LENGTH, min_length=ABS_MIN_LENGTH, do_sample=False, ) summaries.append(result[0]["summary_text"]) summarized_text = " ".join(summaries) st.subheader("Summarized text") st.info(summarized_text)