IndiScan / utils /health_score.py
Wendgan's picture
Upload 9 files
2ae3f7c verified
from typing import List, Dict, Optional
import json
import re
class HealthScoreCalculator:
def __init__(self):
# Initialize risk categories and their weights
self.risk_categories = {
'preservatives': {
'weight': 2.0,
'ingredients': [
'sodium benzoate', 'potassium sorbate', 'sulfites', 'nitrites',
'BHA', 'BHT', 'propylene glycol', 'sodium nitrate'
]
},
'artificial_colors': {
'weight': 1.5,
'ingredients': [
'red 40', 'yellow 5', 'yellow 6', 'blue 1', 'blue 2',
'green 3', 'tartrazine', 'brilliant blue'
]
},
'artificial_sweeteners': {
'weight': 1.8,
'ingredients': [
'aspartame', 'sucralose', 'saccharin', 'acesulfame k',
'neotame', 'advantame'
]
},
'harmful_chemicals': {
'weight': 2.5,
'ingredients': [
'parabens', 'phthalates', 'formaldehyde', 'toluene',
'triclosan', 'lead acetate', 'petroleum'
]
}
}
# Cosmetic-specific categories
self.cosmetic_categories = {
'irritants': {
'weight': 1.7,
'ingredients': [
'sodium lauryl sulfate', 'alcohol denat', 'isopropyl alcohol',
'fragrance', 'essential oils'
]
},
'comedogenic': {
'weight': 1.3,
'ingredients': [
'coconut oil', 'cocoa butter', 'isopropyl myristate',
'sodium chloride', 'laureth-4'
]
}
}
# Food-specific categories
self.food_categories = {
'trans_fats': {
'weight': 2.2,
'ingredients': [
'partially hydrogenated', 'hydrogenated oil',
'shortening', 'margarine'
]
},
'added_sugars': {
'weight': 1.6,
'ingredients': [
'high fructose corn syrup', 'corn syrup', 'dextrose',
'maltose', 'sucrose', 'cane sugar', 'brown sugar'
]
}
}
def calculate_ingredient_position_weight(self, position: int, total_ingredients: int) -> float:
"""Calculate weight based on ingredient position (earlier ingredients have higher weight)"""
if total_ingredients == 0:
return 1.0
return 1.0 + (1.0 - (position / total_ingredients))
def identify_risks(self, ingredient: str) -> List[str]:
"""Identify all risk categories an ingredient belongs to"""
risks = []
ingredient = ingredient.lower()
# Check all categories
all_categories = {
**self.risk_categories,
**self.cosmetic_categories,
**self.food_categories
}
for category, data in all_categories.items():
if any(risk_ingredient in ingredient for risk_ingredient in data['ingredients']):
risks.append(category)
return risks
def calculate_score(self, ingredients: List[str], product_type: str = 'food') -> Dict:
"""Calculate health score and provide risk breakdown"""
if not ingredients:
return {
'score': 500, # Neutral score if no ingredients
'risks': {},
'explanation': "No ingredients provided for analysis"
}
total_ingredients = len(ingredients)
risk_points = 0
risk_breakdown = {}
# Select relevant categories based on product type
categories = {**self.risk_categories}
if product_type.lower() == 'food':
categories.update(self.food_categories)
else:
categories.update(self.cosmetic_categories)
# Analyze each ingredient
for position, ingredient in enumerate(ingredients):
position_weight = self.calculate_ingredient_position_weight(position, total_ingredients)
risks = self.identify_risks(ingredient)
for risk in risks:
if risk in categories:
risk_weight = categories[risk]['weight']
risk_value = risk_weight * position_weight
risk_points += risk_value
if risk not in risk_breakdown:
risk_breakdown[risk] = {
'ingredients': [],
'total_risk': 0
}
risk_breakdown[risk]['ingredients'].append(ingredient)
risk_breakdown[risk]['total_risk'] += risk_value
# Calculate final score (1000 = perfectly healthy, 0 = maximum risk)
base_score = 1000
risk_multiplier = 100 # Adjust this to control how quickly score decreases
final_score = max(0, min(1000, base_score - (risk_points * risk_multiplier)))
# Generate explanation
explanation = self._generate_explanation(risk_breakdown, final_score)
return {
'score': int(final_score),
'risks': risk_breakdown,
'explanation': explanation
}
def _generate_explanation(self, risk_breakdown: Dict, score: float) -> str:
"""Generate a human-readable explanation of the health score"""
if score > 800:
status = "very healthy"
elif score > 600:
status = "moderately healthy"
elif score > 400:
status = "moderate risk"
elif score > 200:
status = "high risk"
else:
status = "very high risk"
explanation = f"This product is considered {status} with a score of {int(score)}."
if risk_breakdown:
explanation += "\n\nKey concerns:"
for risk, data in risk_breakdown.items():
risk_name = risk.replace('_', ' ').title()
ingredients = ', '.join(data['ingredients'])
explanation += f"\n- {risk_name}: Found {len(data['ingredients'])} concerning ingredient(s): {ingredients}"
else:
explanation += "\n\nNo specific risk factors identified in the ingredients list."
return explanation
def analyze_nutrition(self, nutrition_info: Dict) -> Dict:
"""Analyze nutrition information and provide recommendations"""
analysis = {
'concerns': [],
'positives': [],
'recommendations': []
}
# Check calories
if 'calories' in nutrition_info:
if nutrition_info['calories'] > 400:
analysis['concerns'].append("High calorie content")
elif nutrition_info['calories'] < 50:
analysis['positives'].append("Low calorie content")
# Check protein
if 'protein' in nutrition_info:
if nutrition_info['protein'] > 15:
analysis['positives'].append("Good source of protein")
elif nutrition_info['protein'] < 5:
analysis['recommendations'].append("Consider options with more protein")
# Check fat
if 'fat' in nutrition_info:
if nutrition_info['fat'] > 15:
analysis['concerns'].append("High fat content")
# Check sugar
if 'sugar' in nutrition_info:
if nutrition_info['sugar'] > 10:
analysis['concerns'].append("High sugar content")
analysis['recommendations'].append("Look for options with less sugar")
# Check fiber
if 'fiber' in nutrition_info:
if nutrition_info['fiber'] > 5:
analysis['positives'].append("Good source of fiber")
elif nutrition_info['fiber'] < 2:
analysis['recommendations'].append("Consider options with more fiber")
return analysis