Spaces:

JosephCatrambone
/

tiny_doodle_embedding

Sleeping

App Files Files Community

tiny_doodle_embedding / app.py

JosephCatrambone

Don't use versioned model.

61ad454 11 months ago

raw

history blame contribute delete

3.05 kB

	import onnxruntime as ort
	import numpy
	import gradio as gr
	from PIL import Image

	ort_sess = ort.InferenceSession('tiny_doodle_embedding.onnx')

	# force reload now!

	def get_bounds(img):
	# Assumes a BLACK BACKGROUND!
	# White letters on a black background!
	left = img.shape[1]
	right = 0
	top = img.shape[0]
	bottom = 0
	min_color = numpy.min(img)
	max_color = numpy.max(img)
	mean_color = 0.5*(min_color+max_color)
	# Do this the dumb way.
	for y in range(0, img.shape[0]):
	for x in range(0, img.shape[1]):
	if img[y,x] > mean_color:
	left = min(left, x)
	right = max(right, x)
	top = min(top, y)
	bottom = max(bottom, y)
	return (top, bottom, left, right)

	def resize_maxpool(img, out_width: int, out_height: int):
	out = numpy.zeros((out_height, out_width), dtype=img.dtype)
	scale_factor_y = img.shape[0] // out_height
	scale_factor_x = img.shape[1] // out_width
	for y in range(0, out.shape[0]):
	for x in range(0, out.shape[1]):
	out[y,x] = numpy.max(img[yscale_factor_y:(y+1)scale_factor_y, xscale_factor_x:(x+1)scale_factor_x])
	return out

	def process_input(input_msg):
	img = input_msg["composite"]
	# Image is inverted. 255 is white, 0 is what's drawn.
	img_mean = 0.5 * (numpy.max(img) + numpy.min(img))
	img = 1.0 * (img < img_mean) # Invert the image and convert to a float.
	crop_area = get_bounds(img)
	img = img[crop_area[0]:crop_area[1], crop_area[2]:crop_area[3]]
	img = resize_maxpool(img, 32, 32)
	#img_a = numpy.resize(img_a, (32, 32))
	img = numpy.expand_dims(img, axis=0) # Unsqueeze
	return img


	def compare(input_img_a, input_img_b):
	text_out = ""

	img_a = process_input(input_img_a)
	img_b = process_input(input_img_b)

	# We could vcat these and run them in parallel.
	a_embedding = ort_sess.run(None, {'input': img_a.astype(numpy.float32)})[0]
	b_embedding = ort_sess.run(None, {'input': img_b.astype(numpy.float32)})[0]
	a_mag = 1.0#+numpy.dot(a_embedding, a_embedding.T)
	b_mag = 1.0#+numpy.dot(b_embedding, b_embedding.T)
	a_embedding /= a_mag
	b_embedding /= b_mag
	text_out += f"img_a_embedding: {a_embedding}\n"
	text_out += f"img_b_embedding: {b_embedding}\n"
	sim = numpy.dot(a_embedding , b_embedding.T)
	print(sim)
	print(text_out)
	return Image.fromarray(numpy.clip((numpy.hstack([img_a[0], img_b[0]]) * 254), 0, 255).astype(numpy.uint8)), sim[0][0], text_out
	#return sim[0][0], text_out


	demo = gr.Interface(
	fn=compare,
	inputs=[
	gr.Sketchpad(image_mode='L', type='numpy'),
	gr.Sketchpad(image_mode='L', type='numpy'),
	#gr.ImageEditor(
	# width=320, height=320,
	# canvas_size=(320, 320),
	# sources = ["upload", "clipboard"], # Webcam
	# layers=False,
	# image_mode='L', type='numpy',
	#),
	],
	outputs=["image", "number", "text"],
	)

	demo.launch(share=True)