| |
| """ |
| |
| Developed by Abdul S. |
| FA20-BCS-OO1 final app.ipynb |
| |
| |
| Automatically generated by Colab |
| """ |
|
|
| import pandas as pd |
| import numpy as np |
| import gradio as gr |
| from TweetNormalizer import normalizeTweet |
| import seaborn as sns |
| import matplotlib.pyplot as plt |
| from transformers import pipeline |
|
|
| |
| pd.set_option('display.float_format', '{:.2f}'.format) |
|
|
| pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0') |
|
|
| |
|
|
|
|
|
|
| |
| def predict(text=None , fil=None): |
| sentiment =None |
| df=None |
| fig=None |
| |
| if text == None and fil == None: |
| return "Either enter text or upload .csv or .xlsx file.!" , df, fig |
|
|
| |
| preprocessed_text = normalizeTweet(text) |
| |
| if fil: |
| if fil.name.endswith('.csv'): |
| df = pd.read_csv(fil.name, header=None , names=['tweet'], usecols=[0]) |
| elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): |
| df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0]) |
| else: |
| raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") |
|
|
| |
| lst = list(df.tweet) |
| m =[normalizeTweet(i) for i in lst] |
|
|
| d = pd.DataFrame(pipe.predict(m)) |
| df['label'] = d['label'] |
|
|
|
|
|
|
| sarcastic_count = np.sum(df.label == 'sarcastic') |
| non_sarcastic_count = np.sum(df.label =='non_sarcastic') |
|
|
| labels = ['Sarcastic', 'Non-Sarcastic'] |
| sizes = [sarcastic_count, non_sarcastic_count] |
| colors = ['gold', 'lightblue'] |
| explode = (0.1, 0) |
| sns.set_style("whitegrid") |
| fig, ax = plt.subplots() |
| ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) |
| ax.axis('equal') |
|
|
| plt.title('Sarcastic vs Non-Sarcastic Tweets') |
| if text == None: |
| sentiment = df['label'][0] |
| |
| if text != "": |
| prediction = pipe.predict([preprocessed_text])[0] |
| print(prediction) |
| |
| sentiment = "Sarcastic" if prediction['label'] == 'sarcastic' else "Non Sarcastic" |
| if fil == None: |
| df= pd.DataFrame([{'tweet':text, 'label':sentiment}]) |
| |
|
|
| return sentiment, df, fig |
|
|
|
|
|
|
|
|
|
|
| file_path =gr.File(label="Upload a File") |
| output = gr.Label(num_top_classes=2, label="Predicted Labels") |
| detector = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor") |
|
|
| |
|
|
|
|
| |
| pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0") |
|
|
|
|
| |
| def classifyB(text=None , fil=None): |
| sentiment = None |
| df = None |
| fig = None |
| |
| if text is None and fil is None: |
| return "Either enter text or upload .csv or .xlsx file.!" , df, fig |
|
|
|
|
| |
| preprocessed_text = normalizeTweet(text) |
| |
| |
| labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question'] |
| |
| if fil: |
| if fil.name.endswith('.csv'): |
| df = pd.read_csv(fil.name, header=None, names=['tweet'], usecols=[0]) |
| elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): |
| df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0]) |
| else: |
| raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") |
|
|
| lst = list(df['tweet']) |
| m =[normalizeTweet(i) for i in lst] |
| d = pipe2(m) |
|
|
| structured_data = [] |
|
|
| |
| for item in d: |
| labels = item['label'] |
| scores = item['score'] |
| structured_data.append({ "label": labels, "score": scores}) |
|
|
| |
| df1 = pd.DataFrame(structured_data) |
| df = pd.concat([df, df1], axis=1) |
|
|
|
|
| fig = plt.figure() |
| sns.countplot(x='label', data=df, palette='viridis') |
| plt.title('Result: Count Plot') |
| plt.xlabel('label') |
| plt.ylabel('Count') |
| if text is None: |
| sentiment = df['label'][0] |
| |
| |
| if text: |
| prediction = pipe2([preprocessed_text])[0] |
| |
| labels = prediction['label'] |
| scores = prediction['score'] |
| sentiment = labels |
| if fil is None: |
| df= pd.DataFrame([{'tweet':text, 'label':sentiment, "score": scores}]) |
|
|
| return sentiment, df, fig |
|
|
|
|
|
|
| file_path =gr.File(label="Upload a File") |
| label = gr.Label( label="Labels") |
| classifier = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") |
|
|
| main = gr.TabbedInterface([detector, classifier],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" ) |
|
|
| main.launch(share=True) |