import json import re from typing import Dict, Any def extract_manual_data(content: str) -> Dict[str, Any]: """Manually extract product data when JSON parsing fails""" extracted_data = {} # Common patterns to extract information patterns = { 'material_composition': r'material[s]?[:\-\s]*(.*?)(?:\n|$)', 'manufacturing_location': r'manufactur[ing]*[:\-\s]*(?:in\s+)?(.*?)(?:\n|$)', 'product_weight': r'weight[:\-\s]*(\d+(?:\.\d+)?)\s*(kg|g|pounds?|lbs?)', 'transport_distance': r'distance[:\-\s]*(\d+(?:\.\d+)?)\s*(km|miles?)', 'packaging_type': r'packag[ing]*[:\-\s]*(.*?)(?:\n|$)', 'energy_usage': r'energy[:\-\s]*(\d+(?:\.\d+)?)\s*(kwh|watts?|w)', } content_lower = content.lower() for key, pattern in patterns.items(): match = re.search(pattern, content_lower, re.IGNORECASE) if match: if 'weight' in key or 'distance' in key or 'energy' in key: # Extract numeric value and convert to standard units value = float(match.group(1)) unit = match.group(2).lower() # Convert to standard units if key == 'product_weight': if unit in ['g', 'grams']: value = value / 1000 # Convert to kg elif unit in ['pounds', 'lbs', 'lb']: value = value * 0.453592 # Convert to kg extracted_data[key] = value elif key == 'transport_distance': if unit in ['miles', 'mile']: value = value * 1.60934 # Convert to km extracted_data[key] = value elif key == 'energy_usage': if unit in ['watts', 'w']: value = value / 1000 # Convert to kWh (assuming 1 hour usage) extracted_data[key] = value else: extracted_data[key] = match.group(1).strip() # Set default values if not found if 'product_weight' not in extracted_data: extracted_data['product_weight'] = 0.5 # Default 0.5 kg if 'transport_distance' not in extracted_data: extracted_data['transport_distance'] = 1000 # Default 1000 km return extracted_data def parse_extraction_result(content): """Extract structured data from LLM response""" try: # Look for JSON in the response json_match = re.search(r'\{.*\}', content, re.DOTALL) if json_match: return json.loads(json_match.group()) except: pass # Fallback to manual parsing return extract_manual_data(content) def parse_scoring_result(content): """Extract score and recommendations from LLM response""" score_match = re.search(r'(\d+)', content) score = int(score_match.group(1)) if score_match else 50 # Extract recommendations recommendations = [] lines = content.split('\n') for line in lines: if line.strip().startswith('-') or line.strip().startswith('•'): recommendations.append(line.strip()[1:].strip()) return { "score": score, "recommendations": recommendations }