Spaces:

profplate
/

youtube-comments

Sleeping

App Files Files Community

profplate commited on 14 days ago

Commit

12496be

verified ·

1 Parent(s): a941938

Create app.py

Browse files

Files changed (1) hide show

app.py +283 -0

app.py ADDED Viewed

	@@ -0,0 +1,283 @@

+"""
+Text Sentiment Analyzer
+-----------------------
+A Gradio Space that analyzes the sentiment of any block of text
+(book review, student essay, social media post, etc.) and surfaces
+the five most emotionally charged sentences.
+Designed for a free CPU Hugging Face Space.
+"""
+import re
+import logging
+from collections import Counter
+import gradio as gr
+import pandas as pd
+import matplotlib.pyplot as plt
+from transformers import pipeline
+# === Setup Logging ===
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+# === Load model once at startup ===
+# DistilBERT SST-2 is small (~250MB), fast on CPU, and gives a clean
+# POSITIVE / NEGATIVE label with a confidence score we can use as an
+# "emotional intensity" signal.
+MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
+logging.info(f"Loading sentiment model: {MODEL_NAME}")
+sentiment_pipe = pipeline(
+    "sentiment-analysis",
+    model=MODEL_NAME,
+    truncation=True,
+)
+logging.info("Model loaded.")
+# ---------------------------------------------------------------------------
+# Core helpers
+# ---------------------------------------------------------------------------
+def split_sentences(text: str):
+    """Lightweight sentence splitter that avoids extra dependencies."""
+    text = text.strip()
+    if not text:
+        return []
+    # Split on ., !, ? followed by whitespace, keeping reasonable boundaries.
+    raw = re.split(r"(?<=[.!?])\s+", text)
+    return [s.strip() for s in raw if s.strip()]
+def analyze_sentences(sentences):
+    """Run the sentiment model on each sentence and return a list of dicts."""
+    if not sentences:
+        return []
+    results = sentiment_pipe(sentences)
+    out = []
+    for sent, res in zip(sentences, results):
+        label = res["label"].upper()
+        score = float(res["score"])
+        # Signed intensity: + for positive, - for negative.
+        signed = score if label == "POSITIVE" else -score
+        out.append({
+            "sentence": sent,
+            "label": label,
+            "confidence": score,
+            "signed_score": signed,
+        })
+    return out
+def overall_summary(sentence_results):
+    """Build a plain-language summary of the document's overall sentiment."""
+    if not sentence_results:
+        return "No text to analyze."
+    counts = Counter(r["label"] for r in sentence_results)
+    total = len(sentence_results)
+    pos = counts.get("POSITIVE", 0)
+    neg = counts.get("NEGATIVE", 0)
+    avg_signed = sum(r["signed_score"] for r in sentence_results) / total
+    if avg_signed > 0.25:
+        verdict = "Overall tone: POSITIVE"
+    elif avg_signed < -0.25:
+        verdict = "Overall tone: NEGATIVE"
+    else:
+        verdict = "Overall tone: MIXED / NEUTRAL"
+    return (
+        f"{verdict}\n"
+        f"Sentences analyzed: {total}\n"
+        f"Positive: {pos}  |  Negative: {neg}\n"
+        f"Average signed sentiment: {avg_signed:+.2f}  (range -1.0 to +1.0)"
+    )
+def plot_pie_chart(sentence_results):
+    """Pie chart of positive vs negative sentence counts."""
+    counts = Counter(r["label"] for r in sentence_results)
+    pos = counts.get("POSITIVE", 0)
+    neg = counts.get("NEGATIVE", 0)
+    fig, ax = plt.subplots(figsize=(4, 4))
+    if pos == 0 and neg == 0:
+        ax.text(0.5, 0.5, "No data", ha="center", va="center")
+        ax.axis("off")
+        return fig
+    labels, sizes, colors = [], [], []
+    if pos:
+        labels.append("Positive")
+        sizes.append(pos)
+        colors.append("#4CAF50")
+    if neg:
+        labels.append("Negative")
+        sizes.append(neg)
+        colors.append("#E53935")
+    ax.pie(
+        sizes,
+        labels=labels,
+        colors=colors,
+        autopct="%1.1f%%",
+        startangle=90,
+        wedgeprops={"edgecolor": "white", "linewidth": 2},
+    )
+    ax.set_title("Sentence-Level Sentiment Distribution")
+    return fig
+def top_charged_sentences(sentence_results, k: int = 5):
+    """Return the k sentences with the highest absolute sentiment confidence."""
+    ranked = sorted(
+        sentence_results,
+        key=lambda r: r["confidence"],
+        reverse=True,
+    )[:k]
+    rows = []
+    for i, r in enumerate(ranked, start=1):
+        marker = "🟢 POSITIVE" if r["label"] == "POSITIVE" else "🔴 NEGATIVE"
+        rows.append({
+            "Rank": i,
+            "Polarity": marker,
+            "Confidence": f"{r['confidence']:.3f}",
+            "Sentence": r["sentence"],
+        })
+    return pd.DataFrame(rows)
+def render_highlighted(sentence_results, k: int = 5):
+    """Return HTML where the top-k charged sentences are color-highlighted."""
+    if not sentence_results:
+        return "<p><em>No text to display.</em></p>"
+    # Identify which sentences are in the top-k by confidence.
+    top_indices = set(
+        idx for idx, _ in sorted(
+            enumerate(sentence_results),
+            key=lambda pair: pair[1]["confidence"],
+            reverse=True,
+        )[:k]
+    )
+    parts = ["<div style='line-height:1.7; font-size:1rem;'>"]
+    for idx, r in enumerate(sentence_results):
+        text = gr.utils.sanitize_html(r["sentence"]) if hasattr(gr.utils, "sanitize_html") else r["sentence"]
+        # Basic escaping fallback
+        text = (text.replace("&", "&amp;")
+                    .replace("<", "&lt;")
+                    .replace(">", "&gt;"))
+        if idx in top_indices:
+            color = "#C8E6C9" if r["label"] == "POSITIVE" else "#FFCDD2"
+            border = "#2E7D32" if r["label"] == "POSITIVE" else "#B71C1C"
+            parts.append(
+                f"<span style='background:{color}; "
+                f"border-bottom:2px solid {border}; padding:2px 4px; "
+                f"border-radius:3px; margin-right:2px;'>{text}</span> "
+            )
+        else:
+            parts.append(f"<span>{text}</span> ")
+    parts.append("</div>")
+    return "".join(parts)
+# ---------------------------------------------------------------------------
+# Gradio entry point
+# ---------------------------------------------------------------------------
+def analyze_text(text: str):
+    try:
+        if not text or not text.strip():
+            return "Please paste some text to analyze.", None, None, ""
+        sentences = split_sentences(text)
+        if not sentences:
+            return "No sentences detected.", None, None, ""
+        results = analyze_sentences(sentences)
+        summary = overall_summary(results)
+        chart = plot_pie_chart(results)
+        table = top_charged_sentences(results, k=5)
+        highlighted = render_highlighted(results, k=5)
+        return summary, chart, table, highlighted
+    except Exception as e:
+        logging.exception(f"Unexpected error: {e}")
+        return f"Unexpected error: {e}", None, None, ""
+EXAMPLE_TEXTS = [
+    [
+        "I picked up this novel expecting another forgettable thriller, "
+        "but I was completely wrong. The prose is luminous and the "
+        "characters feel painfully real. By the final chapter I was in "
+        "tears. There are a few slow stretches in the middle, and one "
+        "subplot never quite pays off, but those are minor complaints. "
+        "This is easily the best book I have read all year."
+    ],
+    [
+        "The student demonstrates a solid grasp of the source material "
+        "and writes with genuine enthusiasm. However, the argument loses "
+        "focus in the third section, and several claims go unsupported. "
+        "The conclusion is rushed and underwhelming. With more careful "
+        "revision, this could become a strong essay."
+    ],
+    [
+        "Honestly, the new update is a disaster. Everything that used to "
+        "work is now broken, the interface is hideous, and customer "
+        "support has been useless. I cannot believe they shipped this. "
+        "On the bright side, the dark mode looks nice."
+    ],
+]
+with gr.Blocks(title="Text Sentiment Analyzer") as demo:
+    gr.HTML(
+        "<h1 style='text-align:center;'>📝 Text Sentiment Analyzer</h1>"
+        "<p style='text-align:center;'>Paste any block of text — a book "
+        "review, a student essay, a social media post — and get an overall "
+        "sentiment read plus the five most emotionally charged sentences.</p>"
+    )
+    with gr.Row():
+        with gr.Column():
+            text_in = gr.Textbox(
+                label="Paste your text here",
+                lines=12,
+                placeholder="Paste a review, essay, post, or any prose…",
+            )
+            submit_btn = gr.Button("Analyze", variant="primary")
+            gr.Examples(
+                examples=EXAMPLE_TEXTS,
+                inputs=text_in,
+                label="Try an example",
+            )
+        with gr.Column():
+            summary_out = gr.Textbox(label="Overall Sentiment Summary", lines=5)
+            chart_out = gr.Plot(label="Sentiment Distribution")
+    gr.HTML("<h3>🔥 Five Most Emotionally Charged Sentences</h3>")
+    table_out = gr.Dataframe(
+        label="Top Charged Sentences",
+        wrap=True,
+    )
+    gr.HTML("<h3>🖍 Highlighted Text</h3>")
+    highlighted_out = gr.HTML()
+    submit_btn.click(
+        analyze_text,
+        inputs=[text_in],
+        outputs=[summary_out, chart_out, table_out, highlighted_out],
+    )
+if __name__ == "__main__":
+    demo.launch()