import nltk
import validators
import streamlit as st
from transformers import AutoTokenizer, pipeline

from utils import (
    clean_text,
    fetch_article_text,
    preprocess_text_for_abstractive_summarization,
    read_text_from_file,
)

ABS_TOKENIZER_NAME = "facebook/bart-large-cnn"
ABS_MODEL_NAME = "facebook/bart-large-cnn"
ABS_MIN_LENGTH = 30
ABS_MAX_LENGTH = 130


@st.cache_resource
def load_tokenizer():
    return AutoTokenizer.from_pretrained(ABS_TOKENIZER_NAME)


@st.cache_resource
def load_summarizer():
    return pipeline(
        "summarization",
        model=ABS_MODEL_NAME,
        tokenizer=ABS_TOKENIZER_NAME,
    )


def normalize_input_text(inp_text, uploaded_file):
    is_url = bool(inp_text and validators.url(inp_text))

    if is_url:
        _, clean_txt = fetch_article_text(url=inp_text)
    elif uploaded_file:
        clean_txt = read_text_from_file(uploaded_file)
        clean_txt = clean_text(clean_txt)
    else:
        clean_txt = clean_text(inp_text)

    return is_url, clean_txt


if __name__ == "__main__":
    st.set_page_config(page_title="Text Summarization Tool", page_icon="📝")
    st.title("Text Summarization Tool 📝")

    st.markdown("---")
    st.markdown(
        """
This app creates **abstractive summaries** using a Hugging Face Transformers summarization pipeline.

- Paste text
- Enter a URL
- Or upload a `.txt`, `.pdf`, or `.docx` file
"""
    )

    nltk.download("punkt", quiet=True)

    abs_tokenizer = load_tokenizer()
    abs_summarizer = load_summarizer()

    inp_text = st.text_input("Enter text or a URL here")
    st.markdown("<h3 style='text-align: center;'>OR</h3>", unsafe_allow_html=True)
    uploaded_file = st.file_uploader(
        "Upload a .txt, .pdf, .docx file for summarization"
    )

    is_url, clean_txt = normalize_input_text(inp_text, uploaded_file)

    with st.expander("View Input Text"):
        if isinstance(clean_txt, list):
            st.write(" ".join(clean_txt))
        else:
            st.write(clean_txt)

    summarize = st.button("Summarize")

    if summarize:
        if not clean_txt:
            st.warning("Please enter text, a URL, or upload a file.")
            st.stop()

        with st.spinner("Creating summary. This might take a few seconds..."):
            if is_url:
                text_chunks = clean_txt if isinstance(clean_txt, list) else [clean_txt]
            else:
                if isinstance(clean_txt, list):
                    text_chunks = clean_txt
                else:
                    text_chunks = preprocess_text_for_abstractive_summarization(
                        tokenizer=abs_tokenizer,
                        text=clean_txt,
                    )

            if isinstance(text_chunks, str):
                text_chunks = [text_chunks]

            summaries = []
            for chunk in text_chunks:
                if not chunk or not chunk.strip():
                    continue

                result = abs_summarizer(
                    chunk,
                    max_length=ABS_MAX_LENGTH,
                    min_length=ABS_MIN_LENGTH,
                    do_sample=False,
                )
                summaries.append(result[0]["summary_text"])

            summarized_text = " ".join(summaries)

        st.subheader("Summarized text")
        st.info(summarized_text)