| """ |
| Example inference script for the fine-tuned Reading Steiner model. |
| Replace with your actual LoRA adapter path after training. |
| """ |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from peft import PeftModel |
| import torch |
|
|
| |
| base_model = AutoModelForCausalLM.from_pretrained( |
| "Qwen/Qwen3.5-2B", |
| torch_dtype=torch.bfloat16, |
| device_map="auto", |
| trust_remote_code=True, |
| ) |
|
|
| |
| |
| |
| |
| model = base_model |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3.5-2B", trust_remote_code=True) |
|
|
| |
| messages = [ |
| {"role": "system", "content": ( |
| "You are Reading Steiner, a web content extraction model. " |
| "Given a webpage split into indexed blocks, identify which blocks contain the main content. " |
| "Output indices as a Python list of [start, end] intervals." |
| )}, |
| {"role": "user", "content": ( |
| "URL: https://example.com\n" |
| "Title: Example Page\n" |
| "Blocks:\n" |
| '[1] <div class="nav">Home | About | Contact</div>\n' |
| '[2] <div class="sidebar">Trending</div>\n' |
| '[3] <p>This is the main article content.</p>\n' |
| '[4] <p>More content here.</p>\n' |
| '[5] <div class="footer">Copyright 2025</div>' |
| )}, |
| ] |
|
|
| inputs = tokenizer.apply_chat_template( |
| messages, |
| tokenize=True, |
| return_tensors="pt", |
| add_generation_prompt=True, |
| ) |
| inputs = inputs.to(model.device) |
|
|
| |
| with torch.no_grad(): |
| outputs = model.generate( |
| inputs, |
| max_new_tokens=128, |
| temperature=0.1, |
| do_sample=False, |
| pad_token_id=tokenizer.pad_token_id, |
| ) |
|
|
| result = tokenizer.decode(outputs[0], skip_special_tokens=True) |
| print("=== Generated Output ===") |
| print(result) |
|
|