Environmental-Impact-Analyzer / unity_functions.py
Kunal
added unity function for data prasing and scrapping websites
0929a94
import json
import re
from typing import Dict, Any
def extract_manual_data(content: str) -> Dict[str, Any]:
"""Manually extract product data when JSON parsing fails"""
extracted_data = {}
# Common patterns to extract information
patterns = {
'material_composition': r'material[s]?[:\-\s]*(.*?)(?:\n|$)',
'manufacturing_location': r'manufactur[ing]*[:\-\s]*(?:in\s+)?(.*?)(?:\n|$)',
'product_weight': r'weight[:\-\s]*(\d+(?:\.\d+)?)\s*(kg|g|pounds?|lbs?)',
'transport_distance': r'distance[:\-\s]*(\d+(?:\.\d+)?)\s*(km|miles?)',
'packaging_type': r'packag[ing]*[:\-\s]*(.*?)(?:\n|$)',
'energy_usage': r'energy[:\-\s]*(\d+(?:\.\d+)?)\s*(kwh|watts?|w)',
}
content_lower = content.lower()
for key, pattern in patterns.items():
match = re.search(pattern, content_lower, re.IGNORECASE)
if match:
if 'weight' in key or 'distance' in key or 'energy' in key:
# Extract numeric value and convert to standard units
value = float(match.group(1))
unit = match.group(2).lower()
# Convert to standard units
if key == 'product_weight':
if unit in ['g', 'grams']:
value = value / 1000 # Convert to kg
elif unit in ['pounds', 'lbs', 'lb']:
value = value * 0.453592 # Convert to kg
extracted_data[key] = value
elif key == 'transport_distance':
if unit in ['miles', 'mile']:
value = value * 1.60934 # Convert to km
extracted_data[key] = value
elif key == 'energy_usage':
if unit in ['watts', 'w']:
value = value / 1000 # Convert to kWh (assuming 1 hour usage)
extracted_data[key] = value
else:
extracted_data[key] = match.group(1).strip()
# Set default values if not found
if 'product_weight' not in extracted_data:
extracted_data['product_weight'] = 0.5 # Default 0.5 kg
if 'transport_distance' not in extracted_data:
extracted_data['transport_distance'] = 1000 # Default 1000 km
return extracted_data
def parse_extraction_result(content):
"""Extract structured data from LLM response"""
try:
# Look for JSON in the response
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
return json.loads(json_match.group())
except:
pass
# Fallback to manual parsing
return extract_manual_data(content)
def parse_scoring_result(content):
"""Extract score and recommendations from LLM response"""
score_match = re.search(r'(\d+)', content)
score = int(score_match.group(1)) if score_match else 50
# Extract recommendations
recommendations = []
lines = content.split('\n')
for line in lines:
if line.strip().startswith('-') or line.strip().startswith('•'):
recommendations.append(line.strip()[1:].strip())
return {
"score": score,
"recommendations": recommendations
}