Spaces:

D3V1L1810
/

Multi_Text_Classification

Build error

App Files Files Community

Multi_Text_Classification / app.py

D3V1L1810

Update app.py

7a437ab verified over 1 year ago

raw

history blame

3.09 kB

	import tensorflow as tf
	from transformers import BertTokenizer, TFBertForSequenceClassification
	import numpy as np
	import json
	import requests
	import gradio as gr
	import logging

	# Initialize the tokenizer and model
	bert_tokenizer = BertTokenizer.from_pretrained('MultiTokenizer_ep10')
	bert_model = TFBertForSequenceClassification.from_pretrained('MultiModel_ep10')

	# Function to send results to API
	# def send_results_to_api(data, result_url):
	# headers = {'Content-Type':'application/json'}
	# response = requests.post(result_url, json = data, headers=headers)
	# if response.status_code == 200:
	# return response.json
	# else:
	# return {'error':f"failed to send result to API: {response.status_code}"}

	def predict_text(params):
	try:
	params = json.loads(params)
	except json.JSONDecodeError as e:
	logging.error(f"Invalid JSON input: {e.msg} at line {e.lineno} column {e.colno}")
	return {"error": f"Invalid JSON input: {e.msg} at line {e.lineno} column {e.colno}"}

	texts = params.get("urls", [])
	if not params.get("normalfileID", []):
	file_ids = [None] * len(texts)
	else:
	file_ids = params.get("normalfileID", [])

	if not texts:
	return {"error": "Missing required parameters: 'texts'"}

	solutions = []
	confidence_threshold = 0.85 # Define your confidence threshold

	for text, file_id in zip(texts, file_ids):
	encoding = bert_tokenizer.encode_plus(
	text,
	add_special_tokens=True,
	max_length=128,
	return_token_type_ids=True,
	padding='max_length',
	truncation=True,
	return_attention_mask=True,
	return_tensors='tf'
	)
	input_ids = encoding['input_ids']
	token_type_ids = encoding['token_type_ids']
	attention_mask = encoding['attention_mask']

	pred = bert_model.predict([input_ids, token_type_ids, attention_mask])
	logits = pred.logits
	softmax_scores = tf.nn.softmax(logits, axis=1).numpy()[0]
	pred_label = tf.argmax(logits, axis=1).numpy()[0]

	# Get the confidence score for the predicted label
	confidence = softmax_scores[pred_label]
	print(confidence)
	# If confidence is below the threshold, set answer to None
	if confidence < confidence_threshold:
	pred_label = 7 # Set to 'None' class

	label = {0: 'BUSINESS', 1: 'COMEDY', 2: 'CRIME', 3: 'FOOD & DRINK', 4: 'POLITICS', 5: 'SPORTS', 6: 'TRAVEL', 7: 'None'}
	result = {'text': text, 'answer': [label[pred_label]], "qcUser": None, "normalfileID": file_id}
	solutions.append(result)

	# result_url = f"{api}/{job_id}"
	# send_results_to_api(solutions, result_url)
	return json.dumps({"solutions": solutions})

	inputt = gr.Textbox(label="Parameters in Json Format... Eg. {'texts':['text1', 'text2']}")
	outputt = gr.JSON()

	application = gr.Interface(fn=predict_text, inputs=inputt, outputs=outputt, title='Multi Text Classification with API Integration..')
	application.launch()