| import json |
| import re |
| from typing import Dict, Any |
|
|
| def extract_manual_data(content: str) -> Dict[str, Any]: |
| """Manually extract product data when JSON parsing fails""" |
| extracted_data = {} |
| |
| |
| patterns = { |
| 'material_composition': r'material[s]?[:\-\s]*(.*?)(?:\n|$)', |
| 'manufacturing_location': r'manufactur[ing]*[:\-\s]*(?:in\s+)?(.*?)(?:\n|$)', |
| 'product_weight': r'weight[:\-\s]*(\d+(?:\.\d+)?)\s*(kg|g|pounds?|lbs?)', |
| 'transport_distance': r'distance[:\-\s]*(\d+(?:\.\d+)?)\s*(km|miles?)', |
| 'packaging_type': r'packag[ing]*[:\-\s]*(.*?)(?:\n|$)', |
| 'energy_usage': r'energy[:\-\s]*(\d+(?:\.\d+)?)\s*(kwh|watts?|w)', |
| } |
| |
| content_lower = content.lower() |
| |
| for key, pattern in patterns.items(): |
| match = re.search(pattern, content_lower, re.IGNORECASE) |
| if match: |
| if 'weight' in key or 'distance' in key or 'energy' in key: |
| |
| value = float(match.group(1)) |
| unit = match.group(2).lower() |
| |
| |
| if key == 'product_weight': |
| if unit in ['g', 'grams']: |
| value = value / 1000 |
| elif unit in ['pounds', 'lbs', 'lb']: |
| value = value * 0.453592 |
| extracted_data[key] = value |
| |
| elif key == 'transport_distance': |
| if unit in ['miles', 'mile']: |
| value = value * 1.60934 |
| extracted_data[key] = value |
| |
| elif key == 'energy_usage': |
| if unit in ['watts', 'w']: |
| value = value / 1000 |
| extracted_data[key] = value |
| else: |
| extracted_data[key] = match.group(1).strip() |
| |
| |
| if 'product_weight' not in extracted_data: |
| extracted_data['product_weight'] = 0.5 |
| |
| if 'transport_distance' not in extracted_data: |
| extracted_data['transport_distance'] = 1000 |
| |
| return extracted_data |
|
|
|
|
| def parse_extraction_result(content): |
| """Extract structured data from LLM response""" |
| try: |
| |
| json_match = re.search(r'\{.*\}', content, re.DOTALL) |
| if json_match: |
| return json.loads(json_match.group()) |
| except: |
| pass |
| |
| |
| return extract_manual_data(content) |
|
|
| def parse_scoring_result(content): |
| """Extract score and recommendations from LLM response""" |
| score_match = re.search(r'(\d+)', content) |
| score = int(score_match.group(1)) if score_match else 50 |
| |
| |
| recommendations = [] |
| lines = content.split('\n') |
| for line in lines: |
| if line.strip().startswith('-') or line.strip().startswith('•'): |
| recommendations.append(line.strip()[1:].strip()) |
| |
| return { |
| "score": score, |
| "recommendations": recommendations |
| } |
|
|