profplate commited on
Commit
12496be
·
verified ·
1 Parent(s): a941938

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +283 -0
app.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Sentiment Analyzer
3
+ -----------------------
4
+ A Gradio Space that analyzes the sentiment of any block of text
5
+ (book review, student essay, social media post, etc.) and surfaces
6
+ the five most emotionally charged sentences.
7
+
8
+ Designed for a free CPU Hugging Face Space.
9
+ """
10
+
11
+ import re
12
+ import logging
13
+ from collections import Counter
14
+
15
+ import gradio as gr
16
+ import pandas as pd
17
+ import matplotlib.pyplot as plt
18
+ from transformers import pipeline
19
+
20
+ # === Setup Logging ===
21
+ logging.basicConfig(
22
+ level=logging.INFO,
23
+ format="%(asctime)s - %(levelname)s - %(message)s",
24
+ )
25
+
26
+ # === Load model once at startup ===
27
+ # DistilBERT SST-2 is small (~250MB), fast on CPU, and gives a clean
28
+ # POSITIVE / NEGATIVE label with a confidence score we can use as an
29
+ # "emotional intensity" signal.
30
+ MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
31
+ logging.info(f"Loading sentiment model: {MODEL_NAME}")
32
+ sentiment_pipe = pipeline(
33
+ "sentiment-analysis",
34
+ model=MODEL_NAME,
35
+ truncation=True,
36
+ )
37
+ logging.info("Model loaded.")
38
+
39
+
40
+ # ---------------------------------------------------------------------------
41
+ # Core helpers
42
+ # ---------------------------------------------------------------------------
43
+
44
+ def split_sentences(text: str):
45
+ """Lightweight sentence splitter that avoids extra dependencies."""
46
+ text = text.strip()
47
+ if not text:
48
+ return []
49
+ # Split on ., !, ? followed by whitespace, keeping reasonable boundaries.
50
+ raw = re.split(r"(?<=[.!?])\s+", text)
51
+ return [s.strip() for s in raw if s.strip()]
52
+
53
+
54
+ def analyze_sentences(sentences):
55
+ """Run the sentiment model on each sentence and return a list of dicts."""
56
+ if not sentences:
57
+ return []
58
+ results = sentiment_pipe(sentences)
59
+ out = []
60
+ for sent, res in zip(sentences, results):
61
+ label = res["label"].upper()
62
+ score = float(res["score"])
63
+ # Signed intensity: + for positive, - for negative.
64
+ signed = score if label == "POSITIVE" else -score
65
+ out.append({
66
+ "sentence": sent,
67
+ "label": label,
68
+ "confidence": score,
69
+ "signed_score": signed,
70
+ })
71
+ return out
72
+
73
+
74
+ def overall_summary(sentence_results):
75
+ """Build a plain-language summary of the document's overall sentiment."""
76
+ if not sentence_results:
77
+ return "No text to analyze."
78
+
79
+ counts = Counter(r["label"] for r in sentence_results)
80
+ total = len(sentence_results)
81
+ pos = counts.get("POSITIVE", 0)
82
+ neg = counts.get("NEGATIVE", 0)
83
+
84
+ avg_signed = sum(r["signed_score"] for r in sentence_results) / total
85
+ if avg_signed > 0.25:
86
+ verdict = "Overall tone: POSITIVE"
87
+ elif avg_signed < -0.25:
88
+ verdict = "Overall tone: NEGATIVE"
89
+ else:
90
+ verdict = "Overall tone: MIXED / NEUTRAL"
91
+
92
+ return (
93
+ f"{verdict}\n"
94
+ f"Sentences analyzed: {total}\n"
95
+ f"Positive: {pos} | Negative: {neg}\n"
96
+ f"Average signed sentiment: {avg_signed:+.2f} (range -1.0 to +1.0)"
97
+ )
98
+
99
+
100
+ def plot_pie_chart(sentence_results):
101
+ """Pie chart of positive vs negative sentence counts."""
102
+ counts = Counter(r["label"] for r in sentence_results)
103
+ pos = counts.get("POSITIVE", 0)
104
+ neg = counts.get("NEGATIVE", 0)
105
+
106
+ fig, ax = plt.subplots(figsize=(4, 4))
107
+ if pos == 0 and neg == 0:
108
+ ax.text(0.5, 0.5, "No data", ha="center", va="center")
109
+ ax.axis("off")
110
+ return fig
111
+
112
+ labels, sizes, colors = [], [], []
113
+ if pos:
114
+ labels.append("Positive")
115
+ sizes.append(pos)
116
+ colors.append("#4CAF50")
117
+ if neg:
118
+ labels.append("Negative")
119
+ sizes.append(neg)
120
+ colors.append("#E53935")
121
+
122
+ ax.pie(
123
+ sizes,
124
+ labels=labels,
125
+ colors=colors,
126
+ autopct="%1.1f%%",
127
+ startangle=90,
128
+ wedgeprops={"edgecolor": "white", "linewidth": 2},
129
+ )
130
+ ax.set_title("Sentence-Level Sentiment Distribution")
131
+ return fig
132
+
133
+
134
+ def top_charged_sentences(sentence_results, k: int = 5):
135
+ """Return the k sentences with the highest absolute sentiment confidence."""
136
+ ranked = sorted(
137
+ sentence_results,
138
+ key=lambda r: r["confidence"],
139
+ reverse=True,
140
+ )[:k]
141
+
142
+ rows = []
143
+ for i, r in enumerate(ranked, start=1):
144
+ marker = "🟢 POSITIVE" if r["label"] == "POSITIVE" else "🔴 NEGATIVE"
145
+ rows.append({
146
+ "Rank": i,
147
+ "Polarity": marker,
148
+ "Confidence": f"{r['confidence']:.3f}",
149
+ "Sentence": r["sentence"],
150
+ })
151
+ return pd.DataFrame(rows)
152
+
153
+
154
+ def render_highlighted(sentence_results, k: int = 5):
155
+ """Return HTML where the top-k charged sentences are color-highlighted."""
156
+ if not sentence_results:
157
+ return "<p><em>No text to display.</em></p>"
158
+
159
+ # Identify which sentences are in the top-k by confidence.
160
+ top_indices = set(
161
+ idx for idx, _ in sorted(
162
+ enumerate(sentence_results),
163
+ key=lambda pair: pair[1]["confidence"],
164
+ reverse=True,
165
+ )[:k]
166
+ )
167
+
168
+ parts = ["<div style='line-height:1.7; font-size:1rem;'>"]
169
+ for idx, r in enumerate(sentence_results):
170
+ text = gr.utils.sanitize_html(r["sentence"]) if hasattr(gr.utils, "sanitize_html") else r["sentence"]
171
+ # Basic escaping fallback
172
+ text = (text.replace("&", "&amp;")
173
+ .replace("<", "&lt;")
174
+ .replace(">", "&gt;"))
175
+ if idx in top_indices:
176
+ color = "#C8E6C9" if r["label"] == "POSITIVE" else "#FFCDD2"
177
+ border = "#2E7D32" if r["label"] == "POSITIVE" else "#B71C1C"
178
+ parts.append(
179
+ f"<span style='background:{color}; "
180
+ f"border-bottom:2px solid {border}; padding:2px 4px; "
181
+ f"border-radius:3px; margin-right:2px;'>{text}</span> "
182
+ )
183
+ else:
184
+ parts.append(f"<span>{text}</span> ")
185
+ parts.append("</div>")
186
+ return "".join(parts)
187
+
188
+
189
+ # ---------------------------------------------------------------------------
190
+ # Gradio entry point
191
+ # ---------------------------------------------------------------------------
192
+
193
+ def analyze_text(text: str):
194
+ try:
195
+ if not text or not text.strip():
196
+ return "Please paste some text to analyze.", None, None, ""
197
+
198
+ sentences = split_sentences(text)
199
+ if not sentences:
200
+ return "No sentences detected.", None, None, ""
201
+
202
+ results = analyze_sentences(sentences)
203
+ summary = overall_summary(results)
204
+ chart = plot_pie_chart(results)
205
+ table = top_charged_sentences(results, k=5)
206
+ highlighted = render_highlighted(results, k=5)
207
+
208
+ return summary, chart, table, highlighted
209
+
210
+ except Exception as e:
211
+ logging.exception(f"Unexpected error: {e}")
212
+ return f"Unexpected error: {e}", None, None, ""
213
+
214
+
215
+ EXAMPLE_TEXTS = [
216
+ [
217
+ "I picked up this novel expecting another forgettable thriller, "
218
+ "but I was completely wrong. The prose is luminous and the "
219
+ "characters feel painfully real. By the final chapter I was in "
220
+ "tears. There are a few slow stretches in the middle, and one "
221
+ "subplot never quite pays off, but those are minor complaints. "
222
+ "This is easily the best book I have read all year."
223
+ ],
224
+ [
225
+ "The student demonstrates a solid grasp of the source material "
226
+ "and writes with genuine enthusiasm. However, the argument loses "
227
+ "focus in the third section, and several claims go unsupported. "
228
+ "The conclusion is rushed and underwhelming. With more careful "
229
+ "revision, this could become a strong essay."
230
+ ],
231
+ [
232
+ "Honestly, the new update is a disaster. Everything that used to "
233
+ "work is now broken, the interface is hideous, and customer "
234
+ "support has been useless. I cannot believe they shipped this. "
235
+ "On the bright side, the dark mode looks nice."
236
+ ],
237
+ ]
238
+
239
+
240
+ with gr.Blocks(title="Text Sentiment Analyzer") as demo:
241
+ gr.HTML(
242
+ "<h1 style='text-align:center;'>📝 Text Sentiment Analyzer</h1>"
243
+ "<p style='text-align:center;'>Paste any block of text — a book "
244
+ "review, a student essay, a social media post — and get an overall "
245
+ "sentiment read plus the five most emotionally charged sentences.</p>"
246
+ )
247
+
248
+ with gr.Row():
249
+ with gr.Column():
250
+ text_in = gr.Textbox(
251
+ label="Paste your text here",
252
+ lines=12,
253
+ placeholder="Paste a review, essay, post, or any prose…",
254
+ )
255
+ submit_btn = gr.Button("Analyze", variant="primary")
256
+ gr.Examples(
257
+ examples=EXAMPLE_TEXTS,
258
+ inputs=text_in,
259
+ label="Try an example",
260
+ )
261
+
262
+ with gr.Column():
263
+ summary_out = gr.Textbox(label="Overall Sentiment Summary", lines=5)
264
+ chart_out = gr.Plot(label="Sentiment Distribution")
265
+
266
+ gr.HTML("<h3>🔥 Five Most Emotionally Charged Sentences</h3>")
267
+ table_out = gr.Dataframe(
268
+ label="Top Charged Sentences",
269
+ wrap=True,
270
+ )
271
+
272
+ gr.HTML("<h3>🖍 Highlighted Text</h3>")
273
+ highlighted_out = gr.HTML()
274
+
275
+ submit_btn.click(
276
+ analyze_text,
277
+ inputs=[text_in],
278
+ outputs=[summary_out, chart_out, table_out, highlighted_out],
279
+ )
280
+
281
+
282
+ if __name__ == "__main__":
283
+ demo.launch()