Spaces:

goldrode
/

imgToText

Runtime error

imgToText / app.py

Update app.py

e467978 verified over 1 year ago

1.08 kB



	from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
	from PIL import Image
	import gradio as gr

	# Load the model, processor, and tokenizer
	model = VisionEncoderDecoderModel.from_pretrained("microsoft/vision-encoder-decoder-base")
	processor = ViTImageProcessor.from_pretrained("microsoft/vision-encoder-decoder-base")
	tokenizer = AutoTokenizer.from_pretrained("microsoft/vision-encoder-decoder-base")

	# Function to generate captions
	def generate_caption(image):
	# Preprocess the image
	pixel_values = processor(images=image, return_tensors="pt").pixel_values

	# Generate caption
	output_ids = model.generate(pixel_values, max_length=16, num_beams=4)
	caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)

	return caption

	# Gradio interface
	interface = gr.Interface(
	fn=generate_caption,
	inputs=gr.Image(type="pil"),
	outputs="text",
	title="Image to Text (Caption Generator)",
	description="Upload an image, and the AI will describe it!"
	)

	# Launch the interface
	interface.launch()