OmAlve
/

reading-steiner-qwen3.5-2b

Model card Files Files and versions

OmAlve commited on 13 days ago

Commit

07293fd

·

verified ·

1 Parent(s): 8484934

Upload inference_example.py

Files changed (1) hide show

inference_example.py +65 -0

inference_example.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+Example inference script for the fine-tuned Reading Steiner model.
+Replace with your actual LoRA adapter path after training.
+"""
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import torch
+# Load base model
+base_model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen3.5-2B",
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    trust_remote_code=True,
+)
+# Load LoRA adapter (replace with your actual adapter path)
+# model = PeftModel.from_pretrained(base_model, "OmAlve/reading-steiner-qwen3.5-2b")
+# Optionally merge adapter into base model for faster inference:
+# model = model.merge_and_unload()
+model = base_model  # Replace with above after training
+# Tokenizer
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3.5-2B", trust_remote_code=True)
+# Example input
+messages = [
+    {"role": "system", "content": (
+        "You are Reading Steiner, a web content extraction model. "
+        "Given a webpage split into indexed blocks, identify which blocks contain the main content. "
+        "Output indices as a Python list of [start, end] intervals."
+    )},
+    {"role": "user", "content": (
+        "URL: https://example.com\n"
+        "Title: Example Page\n"
+        "Blocks:\n"
+        '[1] <div class="nav">Home | About | Contact</div>\n'
+        '[2] <div class="sidebar">Trending</div>\n'
+        '[3] <p>This is the main article content.</p>\n'
+        '[4] <p>More content here.</p>\n'
+        '[5] <div class="footer">Copyright 2025</div>'
+    )},
+]
+inputs = tokenizer.apply_chat_template(
+    messages,
+    tokenize=True,
+    return_tensors="pt",
+    add_generation_prompt=True,
+)
+inputs = inputs.to(model.device)
+# Generate
+with torch.no_grad():
+    outputs = model.generate(
+        inputs,
+        max_new_tokens=128,
+        temperature=0.1,
+        do_sample=False,
+        pad_token_id=tokenizer.pad_token_id,
+    )
+result = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print("=== Generated Output ===")
+print(result)