| """
|
| Optimized Game Fuzzy Matcher for G-Assist LLM Integration
|
| Handles game name variations, prioritizes Steam API data, and provides intelligent matching.
|
| """
|
|
|
| import re
|
| import logging
|
| from collections import defaultdict
|
| from typing import List, Tuple, Dict, Optional
|
| import asyncio
|
| from src.rtx_llm_analyzer import GAssistLLMAnalyzer
|
|
|
|
|
| class OptimizedGameFuzzyMatcher:
|
| """
|
| Ultra-optimized fuzzy matcher for video game titles
|
| Designed for G-Assist LLM integration with game-specific optimizations
|
| """
|
|
|
| def __init__(self, threshold: float = 0.75):
|
| self.threshold = threshold
|
| self.cache = {}
|
| self.logger = logging.getLogger(__name__)
|
| self.llm_analyzer = GAssistLLMAnalyzer()
|
|
|
|
|
| self.game_map = {
|
|
|
| 'diablo': 'diablo',
|
| 'diablo i': 'diablo',
|
| 'diablo 2': 'diablo ii',
|
| 'diablo 3': 'diablo iii',
|
| 'diablo 4': 'diablo iv',
|
|
|
| 'grand theft auto': 'grand theft auto',
|
| 'gta': 'grand theft auto',
|
| 'gta 3': 'grand theft auto 3',
|
| 'gta iii': 'grand theft auto 3',
|
| 'gta 4': 'grand theft auto 4',
|
| 'gta iv': 'grand theft auto 4',
|
| 'gta 5': 'grand theft auto 5',
|
| 'gta v': 'grand theft auto 5',
|
| }
|
|
|
|
|
| self.acronym_map = {
|
| 'gta': ['grand', 'theft', 'auto'],
|
| 'cod': ['call', 'of', 'duty'],
|
| 'cs': ['counter', 'strike'],
|
| 'csgo': ['counter', 'strike', 'global', 'offensive'],
|
| 'pubg': ['playerunknowns', 'battlegrounds'],
|
| 'ac': ['assassins', 'creed'],
|
| 'ds': ['dark', 'souls'],
|
| 'gow': ['god', 'of', 'war'],
|
| 'hzd': ['horizon', 'zero', 'dawn'],
|
| 'botw': ['breath', 'of', 'the', 'wild'],
|
| 'mw': ['modern', 'warfare'],
|
| 'nfs': ['need', 'for', 'speed'],
|
| 'ff': ['final', 'fantasy'],
|
| 'lol': ['league', 'of', 'legends'],
|
| 'wow': ['world', 'of', 'warcraft'],
|
| 'diablo': ['diablo']
|
| }
|
|
|
|
|
| self.roman_map = {
|
| 'i': '1', 'ii': '2', 'iii': '3', 'iv': '4', 'v': '5',
|
| 'vi': '6', 'vii': '7', 'viii': '8', 'ix': '9', 'x': '10',
|
| 'xi': '11', 'xii': '12', 'xiii': '13', 'xiv': '14', 'xv': '15'
|
| }
|
|
|
|
|
| self.number_to_roman = {v: k for k, v in self.roman_map.items()}
|
|
|
|
|
| self.edition_words = {
|
| 'edition', 'remastered', 'remake', 'definitive', 'ultimate',
|
| 'goty', 'complete', 'deluxe', 'special', 'anniversary', 'enhanced'
|
| }
|
|
|
| def preprocess_title(self, title: str) -> List[str]:
|
| """Preprocess and tokenize game title with aggressive normalization."""
|
| cache_key = f"prep_{title}"
|
| if cache_key in self.cache:
|
| return self.cache[cache_key]
|
|
|
|
|
| clean = re.sub(r'[^a-z0-9\s]', ' ', title.lower())
|
|
|
|
|
| original_tokens = clean.split()
|
|
|
|
|
| tokens = clean.split()
|
|
|
|
|
| if len(original_tokens) >= 2 and any(t.isdigit() or t in self.roman_map for t in original_tokens):
|
| self.logger.info(f"Preserving numbered game title: {title}")
|
| self.cache[cache_key] = original_tokens
|
| return original_tokens
|
|
|
|
|
| processed_tokens = []
|
| i = 0
|
| while i < len(tokens):
|
| token = tokens[i]
|
|
|
|
|
| if token in self.acronym_map:
|
| processed_tokens.extend(self.acronym_map[token])
|
|
|
|
|
| elif i + 1 < len(tokens):
|
| combined = token + tokens[i + 1]
|
| if combined in self.acronym_map:
|
| processed_tokens.extend(self.acronym_map[combined])
|
| i += 1
|
| else:
|
| processed_tokens.append(self.normalize_token(token))
|
| else:
|
| processed_tokens.append(self.normalize_token(token))
|
|
|
| i += 1
|
|
|
| self.cache[cache_key] = processed_tokens
|
| return processed_tokens
|
|
|
| def normalize_token(self, token: str) -> str:
|
| """Normalize individual token with bidirectional roman/number conversion."""
|
|
|
| if token in self.roman_map:
|
| return self.roman_map[token]
|
|
|
|
|
| if token in self.number_to_roman:
|
| return self.number_to_roman[token]
|
|
|
|
|
| if token.isdigit() and len(token) == 2:
|
| year = int(token)
|
| if year < 50:
|
| return f"20{token}"
|
| else:
|
| return f"19{token}"
|
|
|
|
|
| if token.isdigit() or token in self.roman_map:
|
| return token
|
|
|
| return token
|
|
|
| def fuzzy_match_with_variants(self, query: str, target: str) -> float:
|
| """
|
| Enhanced fuzzy matching that creates multiple variants for comparison.
|
| Specifically handles cases like "Diablo 4" -> "Diablo IV"
|
| """
|
|
|
| if query.lower() == target.lower():
|
| return 1.0
|
|
|
|
|
| query_variants = self.generate_variants(query)
|
| target_variants = self.generate_variants(target)
|
|
|
|
|
| max_score = 0.0
|
|
|
| for q_variant in query_variants:
|
| for t_variant in target_variants:
|
| score = self.basic_fuzzy_match(q_variant, t_variant)
|
| max_score = max(max_score, score)
|
|
|
|
|
| if score >= 0.95:
|
| return score
|
|
|
| return max_score
|
|
|
| def generate_variants(self, title: str) -> List[str]:
|
| """Generate multiple variants of a game title for robust matching."""
|
| variants = [title]
|
|
|
|
|
| tokens = self.preprocess_title(title)
|
| if tokens:
|
| variants.append(' '.join(tokens))
|
|
|
|
|
| lower_title = title.lower()
|
|
|
|
|
| for num, roman in self.number_to_roman.items():
|
| if num in lower_title:
|
| variant = lower_title.replace(num, roman)
|
| variants.append(variant)
|
|
|
|
|
| for roman, num in self.roman_map.items():
|
| if roman in lower_title:
|
| variant = lower_title.replace(roman, num)
|
| variants.append(variant)
|
|
|
|
|
| seen = set()
|
| unique_variants = []
|
| for variant in variants:
|
| if variant not in seen:
|
| seen.add(variant)
|
| unique_variants.append(variant)
|
|
|
| return unique_variants
|
|
|
| def basic_fuzzy_match(self, title1: str, title2: str) -> float:
|
| """Basic fuzzy matching with token-based similarity."""
|
| tokens1 = self.preprocess_title(title1)
|
| tokens2 = self.preprocess_title(title2)
|
|
|
| if not tokens1 or not tokens2:
|
| return 0.0
|
|
|
| set1, set2 = set(tokens1), set(tokens2)
|
|
|
|
|
| intersection = set1 & set2
|
| union = set1 | set2
|
|
|
| if not union:
|
| return 0.0
|
|
|
|
|
| jaccard = len(intersection) / len(union)
|
|
|
|
|
| weight = 1.0
|
|
|
|
|
| main_words = intersection - self.edition_words
|
| if main_words:
|
| weight += 0.2
|
|
|
|
|
| edition_diff = (set1 ^ set2) & self.edition_words
|
| if edition_diff:
|
| weight -= 0.05 * len(edition_diff)
|
|
|
| return min(1.0, jaccard * weight)
|
|
|
| def normalize_game_name(self, game_name: str) -> str:
|
| """
|
| Normalize game name for consistent caching and matching.
|
|
|
| Args:
|
| game_name: Original game name
|
|
|
| Returns:
|
| Normalized game name with roman numerals and standardized format
|
| """
|
|
|
| tokens = self.preprocess_title(game_name)
|
| return ' '.join(tokens)
|
|
|
| async def find_best_match(self, query: str, candidates: List[str],
|
| steam_priority: bool = True) -> Optional[Tuple[str, float]]:
|
| """
|
| Find the best match with Steam API prioritization and simplified mapping.
|
|
|
| Args:
|
| query: Game name to search for
|
| candidates: List of candidate game names
|
| steam_priority: Whether to prioritize results that look like Steam data
|
|
|
| Returns:
|
| Tuple of (best_match, confidence_score) or None
|
| """
|
| if not candidates:
|
| return None
|
|
|
|
|
| query_lower = query.lower()
|
| if query_lower in self.game_map:
|
| mapped_name = self.game_map[query_lower]
|
| self.logger.info(f"Direct game map match: '{query}' -> '{mapped_name}'")
|
|
|
|
|
|
|
| for candidate in candidates:
|
| if candidate.lower() == mapped_name.lower():
|
|
|
| return candidate, 1.0
|
|
|
|
|
| for candidate in candidates:
|
| if mapped_name.lower() in candidate.lower().split():
|
| self.logger.info(f"Partial match for mapped name: '{mapped_name}' found in '{candidate}'")
|
| return candidate, 0.95
|
|
|
|
|
| for candidate in candidates:
|
| if candidate.lower() == query_lower:
|
| return candidate, 1.0
|
|
|
|
|
| query_words = query_lower.split()
|
| if len(query_words) >= 2 and any(w.isdigit() or w in self.roman_map for w in query_words):
|
|
|
| self.logger.info(f"Preserving numbered query: {query}")
|
|
|
|
|
| for candidate in candidates:
|
| candidate_lower = candidate.lower()
|
|
|
| if all(word in candidate_lower for word in query_words):
|
| return candidate, 1.0
|
|
|
|
|
|
|
|
|
| self.logger.info(f"No exact match for numbered game: '{query}'")
|
| return None
|
|
|
|
|
| matches = []
|
| for candidate in candidates:
|
|
|
| query_set = set(query_lower.split())
|
| candidate_set = set(candidate.lower().split())
|
|
|
| intersection = query_set & candidate_set
|
| if intersection and len(intersection) / len(query_set) > 0.5:
|
| score = len(intersection) / max(len(query_set), len(candidate_set))
|
| matches.append((candidate, score))
|
|
|
| if matches:
|
|
|
| matches.sort(key=lambda x: x[1], reverse=True)
|
| best_match, best_score = matches[0]
|
| if best_score > 0.6:
|
| return best_match, best_score
|
|
|
|
|
| if candidates:
|
| return candidates[0], 0.5
|
|
|
| return None
|
|
|
| def looks_like_steam_title(self, title: str) -> bool:
|
| """Heuristic to identify Steam-style game titles."""
|
|
|
| return (
|
| len(title) > 5 and
|
| not any(abbrev in title.lower() for abbrev in ['gta', 'cod', 'cs']) and
|
| ':' not in title
|
| )
|
|
|
| async def match_with_steam_fallback(self, query: str, steam_candidates: List[str],
|
| cache_candidates: List[str]) -> Optional[Tuple[str, float, str]]:
|
| """
|
| Match with Steam API prioritization and local cache fallback.
|
|
|
| Returns:
|
| Tuple of (matched_name, confidence_score, source) or None
|
| """
|
|
|
| if steam_candidates:
|
| steam_match = await self.find_best_match(query, steam_candidates, steam_priority=True)
|
| if steam_match and steam_match[1] >= self.threshold:
|
| return steam_match[0], steam_match[1], "Steam API"
|
|
|
|
|
| if cache_candidates:
|
| cache_match = await self.find_best_match(query, cache_candidates, steam_priority=False)
|
| if cache_match and cache_match[1] >= self.threshold:
|
| return cache_match[0], cache_match[1], "Local Cache"
|
|
|
|
|
| self.logger.warning(f"No fuzzy match found for '{query}' in {len(steam_candidates)} Steam + {len(cache_candidates)} cache candidates")
|
| return None
|
|
|
|
|
|
|
| game_fuzzy_matcher = OptimizedGameFuzzyMatcher(threshold=0.7)
|
|
|