import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

MODEL_ID = "kyLELEng/retailops-instruct-qwen3.5-9b"
SYSTEM_PROMPT = 'You are RetailOps-Instruct, an e-commerce catalog optimization assistant. Return exactly one valid JSON object. Do not invent unsupported product features. Use product metadata and reviews as the source of truth.'

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)
model.eval()

sample_input = {
    "category": "Home & Kitchen > Kitchen & Dining > Small Appliances",
    "brand": "ExampleBrand",
    "raw_title": "Portable Electric Kettle",
    "raw_description": "Small kettle for boiling water.",
    "product_specs": {"capacity": "1.0L", "material": "stainless steel", "safety": "auto shut-off"},
    "positive_reviews": ["Heats water quickly.", "Good size for small apartments."],
    "negative_reviews": ["The instructions were unclear.", "The outside gets warm after use."],
    "brand_voice": "clear, practical, trustworthy",
}

messages = [
    {"role": "system", "content": SYSTEM_PROMPT},
    {
        "role": "user",
        "content": (
            "Given the product metadata, category, and customer reviews, "
            "generate an optimized product listing package.\n\nINPUT:\n"
            + json.dumps(sample_input, ensure_ascii=False, indent=2)
        ),
    },
]

prompt = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True,
    enable_thinking=False,
)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=900, do_sample=False)

print(tokenizer.decode(output[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True))