Spaces:

AmeenAktharT
/

idp-system

Sleeping

App Files Files Community

idp-system / src /analyzer.py

AmeenAktharT

Update src/analyzer.py

74b9517 verified about 1 month ago

raw

history blame contribute delete

2.57 kB

	import spacy
	from transformers import pipeline

	try:
	nlp = spacy.load("en_core_web_sm")
	except OSError:
	import spacy.cli
	spacy.cli.download("en_core_web_sm")
	nlp = spacy.load("en_core_web_sm")

	summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
	sentiment_task = pipeline("sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")

	def run_analysis(text):
	if not text or len(text.strip()) < 10:
	return {
	"summary": "Document contains insufficient text for analysis.",
	"key_points": [],
	"entities": {"names": [], "dates": [], "organizations": [], "amounts": []},
	"sentiment": "Neutral"
	}

	# Increased from 1500 to 2000 to read more of the document
	clean_text = text[:2000]

	# --- 1. AI Summarization & Key Points ---
	try:
	# Increased max_length and min_length for richer content
	summary_result = summarizer(clean_text, max_length=200, min_length=60, do_sample=False)
	summary = summary_result[0]['summary_text']

	# PRO FEATURE: Extract sentences into bullet points
	doc_summary = nlp(summary)
	key_points = [sent.text.strip() for sent in doc_summary.sents if len(sent.text.strip()) > 15]
	except Exception:
	summary = "Summary generation failed due to text complexity."
	key_points = []

	# --- 2. Entity Extraction ---
	doc = nlp(text[:3000])
	entities = {"names": [], "dates": [], "organizations": [], "amounts": []}
	for ent in doc.ents:
	text_val = ent.text.strip()
	if len(text_val) < 2: continue
	if ent.label_ == "PERSON": entities["names"].append(text_val)
	elif ent.label_ in ["DATE", "TIME"]: entities["dates"].append(text_val)
	elif ent.label_ == "ORG":
	if text_val.upper() not in ["AI", "PDF", "IDP"]: entities["organizations"].append(text_val)
	elif ent.label_ in ["MONEY", "PERCENT", "QUANTITY"]: entities["amounts"].append(text_val)

	for key in entities: entities[key] = list(dict.fromkeys(entities[key]))

	# --- 3. Sentiment Analysis ---
	try:
	sent_res = sentiment_task(clean_text[:512])[0]
	label = sent_res['label']
	sentiment_map = {"POSITIVE": "Positive", "NEGATIVE": "Negative"}
	sentiment = sentiment_map.get(label, "Neutral")
	except Exception:
	sentiment = "Neutral"

	return {
	"summary": summary,
	"key_points": key_points,
	"entities": entities,
	"sentiment": sentiment
	}