retailops-instruct-qwen3.5-9b / inference_retailops.py
kyLELEng's picture
Upload merged RetailOps-Instruct Qwen3.5-9B full model
a750b19 verified
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
MODEL_ID = "kyLELEng/retailops-instruct-qwen3.5-9b"
SYSTEM_PROMPT = 'You are RetailOps-Instruct, an e-commerce catalog optimization assistant. Return exactly one valid JSON object. Do not invent unsupported product features. Use product metadata and reviews as the source of truth.'
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
trust_remote_code=True,
)
model.eval()
sample_input = {
"category": "Home & Kitchen > Kitchen & Dining > Small Appliances",
"brand": "ExampleBrand",
"raw_title": "Portable Electric Kettle",
"raw_description": "Small kettle for boiling water.",
"product_specs": {"capacity": "1.0L", "material": "stainless steel", "safety": "auto shut-off"},
"positive_reviews": ["Heats water quickly.", "Good size for small apartments."],
"negative_reviews": ["The instructions were unclear.", "The outside gets warm after use."],
"brand_voice": "clear, practical, trustworthy",
}
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{
"role": "user",
"content": (
"Given the product metadata, category, and customer reviews, "
"generate an optimized product listing package.\n\nINPUT:\n"
+ json.dumps(sample_input, ensure_ascii=False, indent=2)
),
},
]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=False,
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
output = model.generate(**inputs, max_new_tokens=900, do_sample=False)
print(tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True))