import nltk import validators import streamlit as st from transformers import AutoTokenizer, pipeline from utils import ( clean_text, fetch_article_text, preprocess_text_for_abstractive_summarization, read_text_from_file, ) ABS_TOKENIZER_NAME = "facebook/bart-large-cnn" ABS_MODEL_NAME = "facebook/bart-large-cnn" ABS_MIN_LENGTH = 30 ABS_MAX_LENGTH = 130 @st.cache_resource def load_tokenizer(): return AutoTokenizer.from_pretrained(ABS_TOKENIZER_NAME) @st.cache_resource def load_summarizer(): return pipeline( "summarization", model=ABS_MODEL_NAME, tokenizer=ABS_TOKENIZER_NAME, ) def normalize_input_text(inp_text, uploaded_file): is_url = bool(inp_text and validators.url(inp_text)) if is_url: _, clean_txt = fetch_article_text(url=inp_text) elif uploaded_file: clean_txt = read_text_from_file(uploaded_file) clean_txt = clean_text(clean_txt) else: clean_txt = clean_text(inp_text) return is_url, clean_txt if __name__ == "__main__": st.set_page_config(page_title="Text Summarization Tool", page_icon="📝") st.title("Text Summarization Tool 📝") st.markdown("---") st.markdown( """ This app creates **abstractive summaries** using a Hugging Face Transformers summarization pipeline. - Paste text - Enter a URL - Or upload a `.txt`, `.pdf`, or `.docx` file """ ) nltk.download("punkt", quiet=True) abs_tokenizer = load_tokenizer() abs_summarizer = load_summarizer() inp_text = st.text_input("Enter text or a URL here") st.markdown("