| import streamlit as st |
| from scipy.special import softmax |
| import numpy as np |
| from tensorflow.keras.preprocessing.sequence import pad_sequences |
| import os |
| from pytorch_transformers import BertForTokenClassification |
| import torch |
| from transformers import BertConfig, BertForSequenceClassification, BertTokenizer |
| max_len = 60 |
| |
| bert_out_address = 'model/' |
|
|
| |
| config = BertConfig.from_json_file(os.path.join(bert_out_address, "config.json")) |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
| |
| model = BertForSequenceClassification(config) |
| |
|
|
| model.load_state_dict(torch.load(os.path.join(bert_out_address, "pytorch_model.bin"), map_location=torch.device(device))) |
| model = BertForTokenClassification.from_pretrained(bert_out_address,num_labels=5) |
|
|
| |
| tokenizer = BertTokenizer.from_pretrained(bert_out_address) |
|
|
| |
| model.eval() |
|
|
|
|
|
|
|
|
| def predict(test_query): |
| import torch |
| tokenized_texts = [] |
| temp_token = [] |
| |
| temp_token.append('[CLS]') |
| token_list = tokenizer.tokenize(test_query) |
| for m,token in enumerate(token_list): |
| temp_token.append(token) |
| |
| if len(temp_token) > max_len-1: |
| temp_token= temp_token[:max_len-1] |
| |
| temp_token.append('[SEP]') |
| tokenized_texts.append(temp_token) |
| input_ids = pad_sequences([tokenizer.convert_tokens_to_ids(txt) for txt in tokenized_texts], |
| maxlen=max_len, dtype="long", truncating="post", padding="post") |
| attention_masks = [[int(i>0) for i in ii] for ii in input_ids] |
| segment_ids = [[0] * len(input_id) for input_id in input_ids] |
| input_ids = torch.tensor(input_ids) |
| attention_masks = torch.tensor(attention_masks) |
| segment_ids = torch.tensor(segment_ids) |
|
|
| |
| device = 'cuda' if torch.cuda.is_available() else 'cpu' |
| model.to(device) |
|
|
| |
| input_ids = input_ids.to(device) |
|
|
| with torch.no_grad(): |
| outputs = model(input_ids, token_type_ids=None, attention_mask=None) |
| logits = outputs[0] |
|
|
| |
| |
| predict_results = logits.detach().cpu().numpy() |
|
|
| from scipy.special import softmax |
| result_arrays_soft = softmax(predict_results[0]) |
| tag2name = {0: 'O', 1: 'ASP', 2: 'X', 3: '[CLS]', 4: '[SEP]'} |
| result_array = result_arrays_soft |
| result_list = np.argmax(result_array,axis=-1) |
| asp = [] |
| for i, mark in enumerate(attention_masks[0]): |
| if mark>0: |
| if tag2name[result_list[i]] == "ASP": |
| |
| asp.append(temp_token[i]) |
| |
| return asp |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
| st.title("Aspect Prediction App") |
|
|
| |
| user_input = st.text_area("Enter the text for Aspect Prection:", "") |
|
|
|
|
| |
| if user_input: |
| |
| with st.spinner("Analyzing..."): |
| result = predict(user_input) |
| result |
| |
|
|
| |
| |