Spaces:
Sleeping
Sleeping
| """ | |
| LLM inference engine for RetailMind. | |
| Uses the HuggingFace Inference API (serverless, GPU-backed) so responses | |
| arrive in ~1β2 s instead of 15β20 s on CPU. Falls back to a structured | |
| template if the API is unavailable. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| import os | |
| from typing import Any | |
| from huggingface_hub import InferenceClient | |
| logger = logging.getLogger(__name__) | |
| _client: InferenceClient | None = None | |
| MODEL = "Qwen/Qwen2.5-72B-Instruct" # strong model, free on HF serverless | |
| def _get_client() -> InferenceClient: | |
| global _client | |
| if _client is None: | |
| token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") | |
| _client = InferenceClient(token=token) | |
| logger.info("InferenceClient ready (model=%s)", MODEL) | |
| return _client | |
| def _build_context(retrieved_items: list[dict[str, Any]]) -> str: | |
| lines = [] | |
| for i, r in enumerate(retrieved_items, 1): | |
| p = r["product"] | |
| stars = "β " * int(p.get("rating", 4)) + "β" * (5 - int(p.get("rating", 4))) | |
| lines.append( | |
| f"{i}. {p['title']} β ${p['price']:.2f}\n" | |
| f" Category: {p['category']} | Rating: {stars} ({p.get('reviews', 0)} reviews)\n" | |
| f" Materials: {p.get('materials', 'N/A')}\n" | |
| f" Description: {p['desc']}" | |
| ) | |
| return "\n\n".join(lines) | |
| def _fallback_response(retrieved_items: list[dict[str, Any]]) -> str: | |
| """Structured template used when the API is unavailable.""" | |
| if not retrieved_items: | |
| return "I couldn't find matching products for your query. Try different keywords." | |
| lines = ["Here are my top picks for you:\n"] | |
| for r in retrieved_items: | |
| p = r["product"] | |
| lines.append(f"β’ **{p['title']}** β ${p['price']:.2f}\n {p['desc'][:120]}β¦") | |
| return "\n".join(lines) | |
| def generate_response( | |
| system_prompt: str, | |
| user_query: str, | |
| retrieved_items: list[dict[str, Any]], | |
| ) -> str: | |
| context = _build_context(retrieved_items) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| f"{system_prompt}\n\n" | |
| f"ββββββ Available Inventory ββββββ\n\n" | |
| f"{context}\n\n" | |
| f"ββββββββββββββββββββββββββββββββ\n" | |
| f"You are a helpful AI shopping assistant. " | |
| f"Only recommend products listed above. " | |
| f"Cite exact names and prices. Be concise (2β4 sentences)." | |
| ), | |
| }, | |
| {"role": "user", "content": user_query}, | |
| ] | |
| try: | |
| client = _get_client() | |
| result = client.chat.completions.create( | |
| model=MODEL, | |
| messages=messages, | |
| max_tokens=150, | |
| temperature=0.3, | |
| ) | |
| return result.choices[0].message.content.strip() | |
| except Exception as e: | |
| logger.warning("Inference API failed (%s), using fallback template.", e) | |
| return _fallback_response(retrieved_items) | |