import re import unicodedata import pandas as pd def limpar_texto(texto): if pd.isna(texto): return "" texto = str(texto).lower().strip() texto = unicodedata.normalize("NFKD", texto) texto = "".join(c for c in texto if not unicodedata.combining(c)) texto = re.sub(r"[\n\r\t]", " ", texto) texto = re.sub(r"[^a-z0-9\s]", " ", texto) texto = re.sub(r"\s+", " ", texto).strip() return texto def mapear_categoria(cat): cat = limpar_texto(cat) if "acai" in cat: return "acai" if "pastel" in cat or "pastel de pizza" in cat: return "pastel" if "pizza" in cat: return "pizza" if "hamburg" in cat or "burger" in cat: return "hamburguer" if "sushi" in cat or "japones" in cat or "oriental" in cat: return "japones" if "suco" in cat: return "suco" if "bebida" in cat or "refrigerante" in cat or "refri" in cat: return "bebida" return cat def inferir_categoria_consulta(query): q = limpar_texto(query) if "acai" in q: return "acai" if "pastel" in q or "pastel de pizza" in q: return "pastel" if "pizza" in q: return "pizza" if "hamburguer" in q or "burger" in q or "x bacon" in q: return "hamburguer" if "sushi" in q or "temaki" in q: return "japones" if "suco" in q: return "suco" if "coca" in q or "refrigerante" in q or "refri" in q: return "bebida" return None def bonus_lexical(query, *texts): q = limpar_texto(query) referencias = [limpar_texto(texto) for texto in texts if texto] bonus = 0.0 for termo in q.split(): if any(termo in referencia for referencia in referencias): bonus += 0.03 return bonus