| import gradio as gr |
| import joblib |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| import json |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
|
| print("Pokrećem aplikaciju...") |
|
|
| |
| svm_pipeline = joblib.load("svm_pipeline.pkl") |
|
|
| with open("word2idx.json", "r", encoding="utf-8") as f: |
| word2idx = json.load(f) |
|
|
| class CNNModel(nn.Module): |
| def __init__(self, vocab_size, embed_dim=300, num_classes=3, kernel_sizes=[3,4,5], num_filters=128): |
| super(CNNModel, self).__init__() |
| self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0) |
| self.convs = nn.ModuleList([ |
| nn.Conv2d(1, num_filters, (k, embed_dim)) for k in kernel_sizes |
| ]) |
| self.dropout = nn.Dropout(0.5) |
| self.fc = nn.Linear(num_filters * len(kernel_sizes), num_classes) |
| def forward(self, x): |
| x = self.embedding(x).unsqueeze(1) |
| convs = [F.relu(conv(x)).squeeze(3) for conv in self.convs] |
| pools = [F.max_pool1d(c, c.size(2)).squeeze(2) for c in convs] |
| x = torch.cat(pools, 1) |
| x = self.dropout(x) |
| return self.fc(x) |
|
|
| class GRUModel(nn.Module): |
| def __init__(self, vocab_size, embed_dim=300, hidden_dim=256, num_layers=1, num_classes=3): |
| super(GRUModel, self).__init__() |
| self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0) |
| self.gru = nn.GRU(embed_dim, hidden_dim, num_layers=num_layers, batch_first=True) |
| self.fc = nn.Linear(hidden_dim, num_classes) |
| def forward(self, x): |
| x = self.embedding(x) |
| _, h_n = self.gru(x) |
| out = self.fc(h_n[-1]) |
| return out |
|
|
| vocab_size = len(word2idx) + 1 |
| embed_dim = 300 |
| num_classes = 3 |
|
|
| cnn_model = CNNModel(vocab_size, embed_dim, num_classes) |
| cnn_model.load_state_dict(torch.load("cnn_model.pt", map_location=torch.device('cpu'))) |
| cnn_model.eval() |
|
|
| gru_model = GRUModel(vocab_size, embed_dim, hidden_dim=256, num_layers=1, num_classes=num_classes) |
| gru_model.load_state_dict(torch.load("gru_model.pt", map_location=torch.device('cpu'))) |
| gru_model.eval() |
|
|
| bert_tokenizer = AutoTokenizer.from_pretrained("my_finetuned_model") |
| bert_model = AutoModelForSequenceClassification.from_pretrained("my_finetuned_model") |
| bert_model.eval() |
|
|
| |
| croslo_tokenizer = AutoTokenizer.from_pretrained("CroSlo") |
| croslo_model = AutoModelForSequenceClassification.from_pretrained("CroSlo") |
| croslo_model.eval() |
|
|
| label_names = {0: 'pozitivno', 1: 'neutralno', 2: 'negativno'} |
|
|
| def text_to_indices(text, max_len=100): |
| tokens = text.lower().split() |
| indices = [word2idx.get(token, 0) for token in tokens] |
| if len(indices) < max_len: |
| indices += [0] * (max_len - len(indices)) |
| else: |
| indices = indices[:max_len] |
| tensor = torch.tensor([indices], dtype=torch.long) |
| return tensor |
|
|
| def predict_svm(text): |
| proba = svm_pipeline.predict_proba([text])[0] |
| pred = svm_pipeline.classes_[proba.argmax()] |
| return f"{label_names[pred]} (p={proba.max():.2f})" |
|
|
| def predict_cnn(text): |
| with torch.no_grad(): |
| inputs = text_to_indices(text) |
| outputs = cnn_model(inputs) |
| probs = F.softmax(outputs, dim=1) |
| pred = torch.argmax(probs, dim=1).item() |
| confidence = probs[0][pred].item() |
| return f"{label_names[pred]} (p={confidence:.2f})" |
|
|
| def predict_gru(text): |
| with torch.no_grad(): |
| inputs = text_to_indices(text) |
| outputs = gru_model(inputs) |
| probs = F.softmax(outputs, dim=1) |
| pred = torch.argmax(probs, dim=1).item() |
| confidence = probs[0][pred].item() |
| return f"{label_names[pred]} (p={confidence:.2f})" |
|
|
| def predict_bert(text): |
| inputs = bert_tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
| with torch.no_grad(): |
| outputs = bert_model(**inputs) |
| probs = F.softmax(outputs.logits, dim=1) |
| pred = torch.argmax(probs, dim=1).item() |
| confidence = probs[0][pred].item() |
| return f"{label_names[pred]} (p={confidence:.2f})" |
|
|
| def predict_croslo(text): |
| inputs = croslo_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512) |
| with torch.no_grad(): |
| outputs = croslo_model(**inputs) |
| probs = F.softmax(outputs.logits, dim=1) |
| pred = torch.argmax(probs, dim=1).item() |
| confidence = probs[0][pred].item() |
| return f"{label_names[pred]} (p={confidence:.2f})" |
|
|
| def predict_all(text): |
| return ( |
| predict_svm(text), |
| predict_cnn(text), |
| predict_gru(text), |
| predict_bert(text), |
| predict_croslo(text), |
| ) |
|
|
| def clear_all(): |
| return "", "", "", "", "", "" |
|
|
| with gr.Blocks() as demo: |
| gr.Markdown( |
| """ |
| <h1 style="text-align: center; font-size: 48px; margin-bottom: 5px;">Analiza sentimenta</h1> |
| <p style="text-align: center; font-size: 16px; margin-top: 0;">Predikcije koriste SVM, CNN, GRU, BERTić i CroSlo modele.</p> |
| """, |
| elem_id="naslov" |
| ) |
|
|
| input_text = gr.Textbox(lines=3, label="Unesite rečenicu za analizu:") |
|
|
| with gr.Row(): |
| submit_btn = gr.Button("Submit", variant="primary") |
| clear_btn = gr.Button("Clear", variant="secondary") |
|
|
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("### Machine Learning") |
| svm_output = gr.Textbox(label="SVM (RBF)") |
| with gr.Column(): |
| gr.Markdown("### Deep Learning") |
| cnn_output = gr.Textbox(label="CNN") |
| gru_output = gr.Textbox(label="GRU") |
| with gr.Column(): |
| gr.Markdown("### Transformers") |
| bert_output = gr.Textbox(label="BERTić") |
| croslo_output = gr.Textbox(label="CroSlo BERT") |
|
|
| submit_btn.click(fn=predict_all, inputs=input_text, outputs=[svm_output, cnn_output, gru_output, bert_output, croslo_output]) |
| clear_btn.click(fn=clear_all, inputs=None, outputs=[input_text, svm_output, cnn_output, gru_output, bert_output, croslo_output]) |
|
|
| if __name__ == "__main__": |
| demo.launch(share=True) |
|
|