Spaces:
Sleeping
Sleeping
File size: 2,574 Bytes
34d98ba e83d5be 34d98ba e83d5be 34d98ba e83d5be 74b9517 e83d5be 74b9517 34d98ba 74b9517 e83d5be 74b9517 e83d5be 74b9517 e83d5be 74b9517 e83d5be 74b9517 e83d5be 34d98ba e83d5be 74b9517 e83d5be 74b9517 e83d5be 74b9517 34d98ba e83d5be 34d98ba 74b9517 34d98ba e83d5be 38412d4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | import spacy
from transformers import pipeline
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
import spacy.cli
spacy.cli.download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
sentiment_task = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
def run_analysis(text):
if not text or len(text.strip()) < 10:
return {
"summary": "Document contains insufficient text for analysis.",
"key_points": [],
"entities": {"names": [], "dates": [], "organizations": [], "amounts": []},
"sentiment": "Neutral"
}
# Increased from 1500 to 2000 to read more of the document
clean_text = text[:2000]
# --- 1. AI Summarization & Key Points ---
try:
# Increased max_length and min_length for richer content
summary_result = summarizer(clean_text, max_length=200, min_length=60, do_sample=False)
summary = summary_result[0]['summary_text']
# PRO FEATURE: Extract sentences into bullet points
doc_summary = nlp(summary)
key_points = [sent.text.strip() for sent in doc_summary.sents if len(sent.text.strip()) > 15]
except Exception:
summary = "Summary generation failed due to text complexity."
key_points = []
# --- 2. Entity Extraction ---
doc = nlp(text[:3000])
entities = {"names": [], "dates": [], "organizations": [], "amounts": []}
for ent in doc.ents:
text_val = ent.text.strip()
if len(text_val) < 2: continue
if ent.label_ == "PERSON": entities["names"].append(text_val)
elif ent.label_ in ["DATE", "TIME"]: entities["dates"].append(text_val)
elif ent.label_ == "ORG":
if text_val.upper() not in ["AI", "PDF", "IDP"]: entities["organizations"].append(text_val)
elif ent.label_ in ["MONEY", "PERCENT", "QUANTITY"]: entities["amounts"].append(text_val)
for key in entities: entities[key] = list(dict.fromkeys(entities[key]))
# --- 3. Sentiment Analysis ---
try:
sent_res = sentiment_task(clean_text[:512])[0]
label = sent_res['label']
sentiment_map = {"POSITIVE": "Positive", "NEGATIVE": "Negative"}
sentiment = sentiment_map.get(label, "Neutral")
except Exception:
sentiment = "Neutral"
return {
"summary": summary,
"key_points": key_points,
"entities": entities,
"sentiment": sentiment
} |