""" Text Sentiment Analyzer ----------------------- A Gradio Space that analyzes the sentiment of any block of text (book review, student essay, social media post, etc.) and surfaces the five most emotionally charged sentences. Designed for a free CPU Hugging Face Space. """ import re import logging from collections import Counter import gradio as gr import pandas as pd import matplotlib.pyplot as plt from transformers import pipeline # === Setup Logging === logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) # === Load model once at startup === # DistilBERT SST-2 is small (~250MB), fast on CPU, and gives a clean # POSITIVE / NEGATIVE label with a confidence score we can use as an # "emotional intensity" signal. MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english" logging.info(f"Loading sentiment model: {MODEL_NAME}") sentiment_pipe = pipeline( "sentiment-analysis", model=MODEL_NAME, truncation=True, ) logging.info("Model loaded.") # --------------------------------------------------------------------------- # Core helpers # --------------------------------------------------------------------------- def split_sentences(text: str): """Lightweight sentence splitter that avoids extra dependencies.""" text = text.strip() if not text: return [] # Split on ., !, ? followed by whitespace, keeping reasonable boundaries. raw = re.split(r"(?<=[.!?])\s+", text) return [s.strip() for s in raw if s.strip()] def analyze_sentences(sentences): """Run the sentiment model on each sentence and return a list of dicts.""" if not sentences: return [] results = sentiment_pipe(sentences) out = [] for sent, res in zip(sentences, results): label = res["label"].upper() score = float(res["score"]) # Signed intensity: + for positive, - for negative. signed = score if label == "POSITIVE" else -score out.append({ "sentence": sent, "label": label, "confidence": score, "signed_score": signed, }) return out def overall_summary(sentence_results): """Build a plain-language summary of the document's overall sentiment.""" if not sentence_results: return "No text to analyze." counts = Counter(r["label"] for r in sentence_results) total = len(sentence_results) pos = counts.get("POSITIVE", 0) neg = counts.get("NEGATIVE", 0) avg_signed = sum(r["signed_score"] for r in sentence_results) / total if avg_signed > 0.25: verdict = "Overall tone: POSITIVE" elif avg_signed < -0.25: verdict = "Overall tone: NEGATIVE" else: verdict = "Overall tone: MIXED / NEUTRAL" return ( f"{verdict}\n" f"Sentences analyzed: {total}\n" f"Positive: {pos} | Negative: {neg}\n" f"Average signed sentiment: {avg_signed:+.2f} (range -1.0 to +1.0)" ) def plot_pie_chart(sentence_results): """Pie chart of positive vs negative sentence counts.""" counts = Counter(r["label"] for r in sentence_results) pos = counts.get("POSITIVE", 0) neg = counts.get("NEGATIVE", 0) fig, ax = plt.subplots(figsize=(4, 4)) if pos == 0 and neg == 0: ax.text(0.5, 0.5, "No data", ha="center", va="center") ax.axis("off") return fig labels, sizes, colors = [], [], [] if pos: labels.append("Positive") sizes.append(pos) colors.append("#4CAF50") if neg: labels.append("Negative") sizes.append(neg) colors.append("#E53935") ax.pie( sizes, labels=labels, colors=colors, autopct="%1.1f%%", startangle=90, wedgeprops={"edgecolor": "white", "linewidth": 2}, ) ax.set_title("Sentence-Level Sentiment Distribution") return fig def top_charged_sentences(sentence_results, k: int = 5): """Return the k sentences with the highest absolute sentiment confidence.""" ranked = sorted( sentence_results, key=lambda r: r["confidence"], reverse=True, )[:k] rows = [] for i, r in enumerate(ranked, start=1): marker = "🟒 POSITIVE" if r["label"] == "POSITIVE" else "πŸ”΄ NEGATIVE" rows.append({ "Rank": i, "Polarity": marker, "Confidence": f"{r['confidence']:.3f}", "Sentence": r["sentence"], }) return pd.DataFrame(rows) def render_highlighted(sentence_results, k: int = 5): """Return HTML where the top-k charged sentences are color-highlighted.""" if not sentence_results: return "

No text to display.

" # Identify which sentences are in the top-k by confidence. top_indices = set( idx for idx, _ in sorted( enumerate(sentence_results), key=lambda pair: pair[1]["confidence"], reverse=True, )[:k] ) parts = ["
"] for idx, r in enumerate(sentence_results): text = gr.utils.sanitize_html(r["sentence"]) if hasattr(gr.utils, "sanitize_html") else r["sentence"] # Basic escaping fallback text = (text.replace("&", "&") .replace("<", "<") .replace(">", ">")) if idx in top_indices: color = "#C8E6C9" if r["label"] == "POSITIVE" else "#FFCDD2" border = "#2E7D32" if r["label"] == "POSITIVE" else "#B71C1C" parts.append( f"{text} " ) else: parts.append(f"{text} ") parts.append("
") return "".join(parts) # --------------------------------------------------------------------------- # Gradio entry point # --------------------------------------------------------------------------- def analyze_text(text: str): try: if not text or not text.strip(): return "Please paste some text to analyze.", None, None, "" sentences = split_sentences(text) if not sentences: return "No sentences detected.", None, None, "" results = analyze_sentences(sentences) summary = overall_summary(results) chart = plot_pie_chart(results) table = top_charged_sentences(results, k=5) highlighted = render_highlighted(results, k=5) return summary, chart, table, highlighted except Exception as e: logging.exception(f"Unexpected error: {e}") return f"Unexpected error: {e}", None, None, "" EXAMPLE_TEXTS = [ [ "I picked up this novel expecting another forgettable thriller, " "but I was completely wrong. The prose is luminous and the " "characters feel painfully real. By the final chapter I was in " "tears. There are a few slow stretches in the middle, and one " "subplot never quite pays off, but those are minor complaints. " "This is easily the best book I have read all year." ], [ "The student demonstrates a solid grasp of the source material " "and writes with genuine enthusiasm. However, the argument loses " "focus in the third section, and several claims go unsupported. " "The conclusion is rushed and underwhelming. With more careful " "revision, this could become a strong essay." ], [ "Honestly, the new update is a disaster. Everything that used to " "work is now broken, the interface is hideous, and customer " "support has been useless. I cannot believe they shipped this. " "On the bright side, the dark mode looks nice." ], ] with gr.Blocks(title="Text Sentiment Analyzer") as demo: gr.HTML( "

πŸ“ Text Sentiment Analyzer

" "

Paste any block of text β€” a book " "review, a student essay, a social media post β€” and get an overall " "sentiment read plus the five most emotionally charged sentences.

" ) with gr.Row(): with gr.Column(): text_in = gr.Textbox( label="Paste your text here", lines=12, placeholder="Paste a review, essay, post, or any prose…", ) submit_btn = gr.Button("Analyze", variant="primary") gr.Examples( examples=EXAMPLE_TEXTS, inputs=text_in, label="Try an example", ) with gr.Column(): summary_out = gr.Textbox(label="Overall Sentiment Summary", lines=5) chart_out = gr.Plot(label="Sentiment Distribution") gr.HTML("

πŸ”₯ Five Most Emotionally Charged Sentences

") table_out = gr.Dataframe( label="Top Charged Sentences", wrap=True, ) gr.HTML("

πŸ– Highlighted Text

") highlighted_out = gr.HTML() submit_btn.click( analyze_text, inputs=[text_in], outputs=[summary_out, chart_out, table_out, highlighted_out], ) if __name__ == "__main__": demo.launch()