Spaces:
Running
Running
| from __future__ import annotations | |
| import re | |
| from sumy.nlp.tokenizers import Tokenizer | |
| from sumy.parsers.plaintext import PlaintextParser | |
| from sumy.summarizers.text_rank import TextRankSummarizer | |
| SENTENCE_SPLIT_REGEX = re.compile(r"(?<=[.!?])\s+") | |
| def lead1_summary(text: str) -> str: | |
| text = " ".join(str(text).split()) | |
| if not text: | |
| return "" | |
| return re.split(SENTENCE_SPLIT_REGEX, text)[0].strip() | |
| def textrank_summary(text: str, sentence_count: int = 1) -> str: | |
| text = " ".join(str(text).split()) | |
| if not text: | |
| return "" | |
| parser = PlaintextParser.from_string(text, Tokenizer("english")) | |
| summarizer = TextRankSummarizer() | |
| summary = " ".join(str(sentence) for sentence in summarizer(parser.document, sentence_count)).strip() | |
| return summary or lead1_summary(text) | |
| def run_baseline(model_name: str, text: str) -> str: | |
| if model_name == "lead1": | |
| return lead1_summary(text) | |
| if model_name == "textrank": | |
| return textrank_summary(text) | |
| raise ValueError(f"Unsupported baseline model: {model_name}") | |