| import gradio as gr |
| import pandas as pd |
|
|
| import io |
|
|
| from Bio import SeqIO |
| from tape import ProteinBertModel, ProteinBertConfig, TAPETokenizer |
| from tape.models import modeling_bert |
| import numpy as np |
| import torch |
|
|
|
|
| tokenizer = TAPETokenizer(vocab='iupac') |
| config=modeling_bert.ProteinBertConfig(num_hidden_layers=5,num_attention_heads=8,hidden_size=400) |
|
|
| bert_model = torch.load('models/transformer1500_95p_500.pt') |
| class_model=torch.load('models/down_model_500_kfold1.pt') |
|
|
| bert_model=bert_model.module |
| bert_model=bert_model.to("cpu") |
| bert_model=bert_model.eval() |
|
|
|
|
| def func(name): |
|
|
| translation_table = str.maketrans("", "", " \t\n\r\f\v") |
| name = name.translate(translation_table) |
| token_ids = torch.tensor([tokenizer.encode(name)]) |
| token_ids = token_ids |
| bert_output = bert_model(token_ids) |
| class_output=class_model(bert_output[1]) |
| class_output = torch.softmax(class_output, dim=1) |
| cluster = torch.argmax(class_output, dim=1) + 1 |
| cluster=cluster.item() |
|
|
| return "cluster "+str(cluster) |
|
|
|
|
| def func_mult(name): |
| sequence_list = process_fasta(name) |
| |
| |
| output=[] |
| for i in range(0, len(sequence_list), 1): |
| output.append(func(sequence_list[i])) |
| result = "\n".join(output) |
| return result |
|
|
|
|
| def process_fasta(fasta_content): |
| sequences = [] |
| fasta_file = io.StringIO(fasta_content) |
| for record in SeqIO.parse(fasta_file, "fasta"): |
| sequences.append(str(record.seq)) |
| return sequences |
|
|
|
|
| def read_fasta_file(file_path): |
| sequences = [] |
| for seq_record in SeqIO.parse(file_path, "fasta"): |
| sequences.append(str(seq_record.seq)) |
| return sequences |
|
|
|
|
| def func_file(file_path): |
| sequence_list = read_fasta_file(file_path) |
| output=[] |
| for i in range(0, len(sequence_list), 1): |
| output.append(func(sequence_list[i])) |
| result = "\n".join(output) |
| return result |
|
|
|
|
| def upload_file(files): |
| file_paths = [file.name for file in files] |
| return file_paths[0] |
|
|
|
|
| def save_to_txt(data): |
| |
| file_name="output.txt" |
| with open(file_name, mode='w') as file: |
| file.write(data) |
|
|
| |
| return file_name |
|
|
|
|
| css = """ |
| .gradio-container {background-color: #EDEFF7} |
| .button {background-color: #515D90; color:#FFFFFF} |
| .feedback {font-size: 36px} |
| """ |
|
|
| with gr.Blocks(css=css, title="GH29 Prediction", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("GH29 Prediction", elem_classes="feedback") |
|
|
| |
|
|
| with gr.Tab("Single sequence input"): |
| with gr.Row(): |
| single_input = gr.Textbox(lines=10, placeholder="Please input sequence data (note: do not input fasta data)", label="Input") |
| single_output = gr.Textbox(lines=10, label="Output", show_copy_button=True) |
| single_button = gr.Button("Predict", elem_classes="button") |
|
|
| with gr.Tab("Multiple sequence input"): |
| multiple_input = gr.Textbox(lines=10, placeholder="Please enter multiple sequence data separated by line breaks (do not enter fasta data)", label="Input") |
| multiple_button = gr.Button("Predict", elem_classes="button") |
| multiple_output = gr.Textbox(lines=10, label="Output", show_copy_button=True) |
|
|
| with gr.Tab("FASTA input"): |
| with gr.Row(): |
| file_upload = gr.File(label="Fasta File", interactive=False, scale=2) |
| file_output_textbox = gr.Textbox(lines=15, label="Output", scale=3, container=True, autoscroll=True, show_copy_button=True) |
| file_output_file = gr.File(label="Output File", scale=2) |
| with gr.Row(): |
| upload_button = gr.UploadButton("Click to Upload a File", file_types=["fasta"], scale=2, size="sm", file_count="multiple") |
| upload_button.upload(upload_file, upload_button, file_upload) |
| file_button = gr.Button("Predict", scale=3, size="lg", elem_classes="button") |
| file_button_GenerateFile = gr.Button("Save to File", scale=2, size="sm") |
|
|
| single_button.click(func, inputs=single_input, outputs=single_output) |
| multiple_button.click(func_mult, inputs=multiple_input, outputs=multiple_output) |
| file_button.click(func_file, inputs=file_upload, outputs=file_output_textbox) |
| file_button_GenerateFile.click(save_to_txt, inputs=file_output_textbox, outputs=file_output_file) |
|
|
|
|
| demo.launch(share=True) |