import spacy from transformers import pipeline try: nlp = spacy.load("en_core_web_sm") except OSError: import spacy.cli spacy.cli.download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") sentiment_task = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") def run_analysis(text): if not text or len(text.strip()) < 10: return { "summary": "Document contains insufficient text for analysis.", "key_points": [], "entities": {"names": [], "dates": [], "organizations": [], "amounts": []}, "sentiment": "Neutral" } # Increased from 1500 to 2000 to read more of the document clean_text = text[:2000] # --- 1. AI Summarization & Key Points --- try: # Increased max_length and min_length for richer content summary_result = summarizer(clean_text, max_length=200, min_length=60, do_sample=False) summary = summary_result[0]['summary_text'] # PRO FEATURE: Extract sentences into bullet points doc_summary = nlp(summary) key_points = [sent.text.strip() for sent in doc_summary.sents if len(sent.text.strip()) > 15] except Exception: summary = "Summary generation failed due to text complexity." key_points = [] # --- 2. Entity Extraction --- doc = nlp(text[:3000]) entities = {"names": [], "dates": [], "organizations": [], "amounts": []} for ent in doc.ents: text_val = ent.text.strip() if len(text_val) < 2: continue if ent.label_ == "PERSON": entities["names"].append(text_val) elif ent.label_ in ["DATE", "TIME"]: entities["dates"].append(text_val) elif ent.label_ == "ORG": if text_val.upper() not in ["AI", "PDF", "IDP"]: entities["organizations"].append(text_val) elif ent.label_ in ["MONEY", "PERCENT", "QUANTITY"]: entities["amounts"].append(text_val) for key in entities: entities[key] = list(dict.fromkeys(entities[key])) # --- 3. Sentiment Analysis --- try: sent_res = sentiment_task(clean_text[:512])[0] label = sent_res['label'] sentiment_map = {"POSITIVE": "Positive", "NEGATIVE": "Negative"} sentiment = sentiment_map.get(label, "Neutral") except Exception: sentiment = "Neutral" return { "summary": summary, "key_points": key_points, "entities": entities, "sentiment": sentiment }