Spaces:
Sleeping
Sleeping
| import spacy | |
| from transformers import pipeline | |
| try: | |
| nlp = spacy.load("en_core_web_sm") | |
| except OSError: | |
| import spacy.cli | |
| spacy.cli.download("en_core_web_sm") | |
| nlp = spacy.load("en_core_web_sm") | |
| summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
| sentiment_task = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english") | |
| def run_analysis(text): | |
| if not text or len(text.strip()) < 10: | |
| return { | |
| "summary": "Document contains insufficient text for analysis.", | |
| "key_points": [], | |
| "entities": {"names": [], "dates": [], "organizations": [], "amounts": []}, | |
| "sentiment": "Neutral" | |
| } | |
| # Increased from 1500 to 2000 to read more of the document | |
| clean_text = text[:2000] | |
| # --- 1. AI Summarization & Key Points --- | |
| try: | |
| # Increased max_length and min_length for richer content | |
| summary_result = summarizer(clean_text, max_length=200, min_length=60, do_sample=False) | |
| summary = summary_result[0]['summary_text'] | |
| # PRO FEATURE: Extract sentences into bullet points | |
| doc_summary = nlp(summary) | |
| key_points = [sent.text.strip() for sent in doc_summary.sents if len(sent.text.strip()) > 15] | |
| except Exception: | |
| summary = "Summary generation failed due to text complexity." | |
| key_points = [] | |
| # --- 2. Entity Extraction --- | |
| doc = nlp(text[:3000]) | |
| entities = {"names": [], "dates": [], "organizations": [], "amounts": []} | |
| for ent in doc.ents: | |
| text_val = ent.text.strip() | |
| if len(text_val) < 2: continue | |
| if ent.label_ == "PERSON": entities["names"].append(text_val) | |
| elif ent.label_ in ["DATE", "TIME"]: entities["dates"].append(text_val) | |
| elif ent.label_ == "ORG": | |
| if text_val.upper() not in ["AI", "PDF", "IDP"]: entities["organizations"].append(text_val) | |
| elif ent.label_ in ["MONEY", "PERCENT", "QUANTITY"]: entities["amounts"].append(text_val) | |
| for key in entities: entities[key] = list(dict.fromkeys(entities[key])) | |
| # --- 3. Sentiment Analysis --- | |
| try: | |
| sent_res = sentiment_task(clean_text[:512])[0] | |
| label = sent_res['label'] | |
| sentiment_map = {"POSITIVE": "Positive", "NEGATIVE": "Negative"} | |
| sentiment = sentiment_map.get(label, "Neutral") | |
| except Exception: | |
| sentiment = "Neutral" | |
| return { | |
| "summary": summary, | |
| "key_points": key_points, | |
| "entities": entities, | |
| "sentiment": sentiment | |
| } |