Spaces:
Sleeping
Sleeping
| # from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from transformers import pipeline | |
| from post_search import Post | |
| import streamlit as st | |
| def load_model(): | |
| model_name = "cointegrated/rubert-tiny2-cedr-emotion-detection" | |
| # return AutoTokenizer.from_pretrained(model_name), AutoModelForSequenceClassification.from_pretrained(model_name) | |
| return pipeline("text-classification", model_name) | |
| # tokenizer, model = load_model() | |
| pipe = load_model() | |
| emotions = ['no_emotion', 'joy', 'sadness', 'surprise', 'fear', 'anger'] | |
| BATCH_SIZE = 64 | |
| # def get_sentiment(posts: list[Post]): | |
| # all_texts = [post.text for post in posts] | |
| # result = [] | |
| # for i in range(0, len(all_texts), BATCH_SIZE): | |
| # texts = all_texts[i*BATCH_SIZE:(i+1)*BATCH_SIZE] | |
| # inputs = tokenizer(texts, padding=True, truncation=True, max_len=512, return_tensors='pt') | |
| # print("Got tokens", inputs, flush=True) | |
| # output = model(**inputs) | |
| # print("Got output", flush=True) | |
| # probs = torch.softmax(output['logits'], dim=-1) | |
| # print("Got probs", flush=True) | |
| # result.extend([{emotion: probs[i, j].item() for j, emotion in enumerate(emotions)} for i in range(len(probs))]) | |
| # return result | |
| def get_sentiment(posts: list[Post]): | |
| all_texts = [post.text for post in posts] | |
| return pipe(all_texts, truncation=True, max_len=2048) |