OmAlve
/

reading-steiner-qwen3.5-2b

Model card Files Files and versions

reading-steiner-qwen3.5-2b / inference_example.py

OmAlve's picture

Upload inference_example.py

07293fd verified 10 days ago

history blame contribute delete

2.02 kB

	"""
	Example inference script for the fine-tuned Reading Steiner model.
	Replace with your actual LoRA adapter path after training.
	"""
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	import torch

	# Load base model
	base_model = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen3.5-2B",
	torch_dtype=torch.bfloat16,
	device_map="auto",
	trust_remote_code=True,
	)

	# Load LoRA adapter (replace with your actual adapter path)
	# model = PeftModel.from_pretrained(base_model, "OmAlve/reading-steiner-qwen3.5-2b")
	# Optionally merge adapter into base model for faster inference:
	# model = model.merge_and_unload()
	model = base_model # Replace with above after training

	# Tokenizer
	tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3.5-2B", trust_remote_code=True)

	# Example input
	messages = [
	{"role": "system", "content": (
	"You are Reading Steiner, a web content extraction model. "
	"Given a webpage split into indexed blocks, identify which blocks contain the main content. "
	"Output indices as a Python list of [start, end] intervals."
	)},
	{"role": "user", "content": (
	"URL: https://example.com\n"
	"Title: Example Page\n"
	"Blocks:\n"
	'[1] <div class="nav">Home \| About \| Contact</div>\n'
	'[2] <div class="sidebar">Trending</div>\n'
	'[3] <p>This is the main article content.</p>\n'
	'[4] <p>More content here.</p>\n'
	'[5] <div class="footer">Copyright 2025</div>'
	)},
	]

	inputs = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	return_tensors="pt",
	add_generation_prompt=True,
	)
	inputs = inputs.to(model.device)

	# Generate
	with torch.no_grad():
	outputs = model.generate(
	inputs,
	max_new_tokens=128,
	temperature=0.1,
	do_sample=False,
	pad_token_id=tokenizer.pad_token_id,
	)

	result = tokenizer.decode(outputs[0], skip_special_tokens=True)
	print("=== Generated Output ===")
	print(result)