Spaces:

MLOps26
/

FGDemo

Sleeping

FGDemo / future_work /inference.py

New Slate Mistakes Were Made

3b6ded8 3 months ago

1.16 kB

	from consts import REASONING_START, REASONING_END, SOLUTION_START, SOLUTION_END
	from transformers import TextStreamer
	from unsloth import FastVisionModel


	def inference(idx: int, model, dataset, tokenizer):
	FastVisionModel.for_inference(model)
	image = dataset[idx]["decoded_image"]
	instruction = (
	f"{dataset[idx]["question"]}, provide your reasoning between {REASONING_START} and {REASONING_END} "
	f"and then your final answer between {SOLUTION_START} and (put a float here) {SOLUTION_END}"
	)

	messages = [
	{
	"role": "user",
	"content": [{"type": "image"}, {"type": "text", "text": instruction}],
	}
	]

	input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
	inputs = tokenizer(
	image,
	input_text,
	add_special_tokens=False,
	return_tensors="pt",
	).to("cuda")

	text_streamer = TextStreamer(tokenizer, skip_prompt=True)
	result = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
	use_cache=True, temperature = 1.0, top_p = 0.95, top_k = 64)
	return result