Spaces:
Running
Running
File size: 1,071 Bytes
e078b1d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | from __future__ import annotations
import re
from sumy.nlp.tokenizers import Tokenizer
from sumy.parsers.plaintext import PlaintextParser
from sumy.summarizers.text_rank import TextRankSummarizer
SENTENCE_SPLIT_REGEX = re.compile(r"(?<=[.!?])\s+")
def lead1_summary(text: str) -> str:
text = " ".join(str(text).split())
if not text:
return ""
return re.split(SENTENCE_SPLIT_REGEX, text)[0].strip()
def textrank_summary(text: str, sentence_count: int = 1) -> str:
text = " ".join(str(text).split())
if not text:
return ""
parser = PlaintextParser.from_string(text, Tokenizer("english"))
summarizer = TextRankSummarizer()
summary = " ".join(str(sentence) for sentence in summarizer(parser.document, sentence_count)).strip()
return summary or lead1_summary(text)
def run_baseline(model_name: str, text: str) -> str:
if model_name == "lead1":
return lead1_summary(text)
if model_name == "textrank":
return textrank_summary(text)
raise ValueError(f"Unsupported baseline model: {model_name}")
|