Files
bakery-ia/services/inventory/app/services/product_classifier.py
2025-08-13 17:39:35 +02:00

467 lines
18 KiB
Python

# services/inventory/app/services/product_classifier.py
"""
AI Product Classification Service
Automatically classifies products from sales data during onboarding
"""
import re
import structlog
from typing import Dict, Any, List, Optional, Tuple
from enum import Enum
from dataclasses import dataclass
from app.models.inventory import ProductType, IngredientCategory, ProductCategory, UnitOfMeasure
logger = structlog.get_logger()
@dataclass
class ProductSuggestion:
"""Suggested inventory item from sales data analysis"""
original_name: str
suggested_name: str
product_type: ProductType
category: str # ingredient_category or product_category
unit_of_measure: UnitOfMeasure
confidence_score: float # 0.0 to 1.0
estimated_shelf_life_days: Optional[int] = None
requires_refrigeration: bool = False
requires_freezing: bool = False
is_seasonal: bool = False
suggested_supplier: Optional[str] = None
notes: Optional[str] = None
class ProductClassifierService:
"""AI-powered product classification for onboarding automation"""
def __init__(self):
self._load_classification_rules()
def _load_classification_rules(self):
"""Load classification patterns and rules"""
# Ingredient patterns with high confidence
self.ingredient_patterns = {
IngredientCategory.FLOUR: {
'patterns': [
r'harina', r'flour', r'trigo', r'wheat', r'integral', r'whole.*wheat',
r'centeno', r'rye', r'avena', r'oat', r'maiz', r'corn'
],
'unit': UnitOfMeasure.KILOGRAMS,
'shelf_life': 365,
'supplier_hints': ['molinos', 'harinera', 'mill']
},
IngredientCategory.YEAST: {
'patterns': [
r'levadura', r'yeast', r'fermento', r'baker.*yeast', r'instant.*yeast'
],
'unit': UnitOfMeasure.GRAMS,
'shelf_life': 730,
'refrigeration': True
},
IngredientCategory.DAIRY: {
'patterns': [
r'leche', r'milk', r'nata', r'cream', r'mantequilla', r'butter',
r'queso', r'cheese', r'yogur', r'yogurt'
],
'unit': UnitOfMeasure.LITERS,
'shelf_life': 7,
'refrigeration': True
},
IngredientCategory.EGGS: {
'patterns': [
r'huevo', r'egg', r'clara', r'white', r'yema', r'yolk'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 28,
'refrigeration': True
},
IngredientCategory.SUGAR: {
'patterns': [
r'azucar', r'sugar', r'edulcorante', r'sweetener', r'miel', r'honey',
r'jarabe', r'syrup', r'mascabado', r'brown.*sugar'
],
'unit': UnitOfMeasure.KILOGRAMS,
'shelf_life': 730
},
IngredientCategory.FATS: {
'patterns': [
r'aceite', r'oil', r'grasa', r'fat', r'margarina', r'margarine',
r'manteca', r'lard', r'oliva', r'olive'
],
'unit': UnitOfMeasure.LITERS,
'shelf_life': 365
},
IngredientCategory.SALT: {
'patterns': [
r'sal', r'salt', r'sodium', r'sodio'
],
'unit': UnitOfMeasure.KILOGRAMS,
'shelf_life': 1825 # 5 years
},
IngredientCategory.SPICES: {
'patterns': [
r'canela', r'cinnamon', r'vainilla', r'vanilla', r'cacao', r'cocoa',
r'chocolate', r'anis', r'anise', r'cardamomo', r'cardamom',
r'jengibre', r'ginger', r'nuez.*moscada', r'nutmeg'
],
'unit': UnitOfMeasure.GRAMS,
'shelf_life': 730
},
IngredientCategory.ADDITIVES: {
'patterns': [
r'polvo.*hornear', r'baking.*powder', r'bicarbonato', r'soda',
r'cremor.*tartaro', r'cream.*tartar', r'lecitina', r'lecithin',
r'conservante', r'preservative', r'emulsificante', r'emulsifier'
],
'unit': UnitOfMeasure.GRAMS,
'shelf_life': 730
},
IngredientCategory.PACKAGING: {
'patterns': [
r'bolsa', r'bag', r'envase', r'container', r'papel', r'paper',
r'plastico', r'plastic', r'carton', r'cardboard'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 1825
}
}
# Finished product patterns
self.product_patterns = {
ProductCategory.BREAD: {
'patterns': [
r'pan\b', r'bread', r'baguette', r'hogaza', r'loaf', r'molde',
r'integral', r'whole.*grain', r'centeno', r'rye.*bread'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 3,
'display_life': 24 # hours
},
ProductCategory.CROISSANTS: {
'patterns': [
r'croissant', r'cruasan', r'napolitana', r'palmera', r'palmier'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 2,
'display_life': 12
},
ProductCategory.PASTRIES: {
'patterns': [
r'pastel', r'pastry', r'hojaldre', r'puff.*pastry', r'empanada',
r'milhojas', r'napoleon', r'eclair', r'profiterol'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 2,
'display_life': 24,
'refrigeration': True
},
ProductCategory.CAKES: {
'patterns': [
r'tarta', r'cake', r'bizcocho', r'sponge', r'cheesecake',
r'tiramisu', r'mousse', r'torta'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 3,
'refrigeration': True
},
ProductCategory.COOKIES: {
'patterns': [
r'galleta', r'cookie', r'biscuit', r'mantecada', r'madeleine'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 14
},
ProductCategory.MUFFINS: {
'patterns': [
r'muffin', r'magdalena', r'cupcake', r'fairy.*cake'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 3
},
ProductCategory.SANDWICHES: {
'patterns': [
r'sandwich', r'bocadillo', r'tostada', r'toast', r'bagel'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 1,
'display_life': 6,
'refrigeration': True
},
ProductCategory.BEVERAGES: {
'patterns': [
r'cafe', r'coffee', r'te\b', r'tea', r'chocolate.*caliente',
r'hot.*chocolate', r'zumo', r'juice', r'batido', r'smoothie'
],
'unit': UnitOfMeasure.UNITS,
'shelf_life': 1
}
}
# Seasonal indicators
self.seasonal_patterns = {
'christmas': [r'navidad', r'christmas', r'turron', r'polvoron', r'roscon'],
'easter': [r'pascua', r'easter', r'mona', r'torrija'],
'summer': [r'helado', r'ice.*cream', r'granizado', r'sorbete']
}
def classify_product(self, product_name: str, sales_volume: Optional[float] = None) -> ProductSuggestion:
"""Classify a single product name into inventory suggestion"""
# Normalize product name for analysis
normalized_name = self._normalize_name(product_name)
# Try to classify as ingredient first
ingredient_result = self._classify_as_ingredient(normalized_name, product_name)
if ingredient_result and ingredient_result.confidence_score >= 0.7:
return ingredient_result
# Try to classify as finished product
product_result = self._classify_as_finished_product(normalized_name, product_name)
if product_result:
return product_result
# Fallback: create generic finished product with low confidence
return self._create_fallback_suggestion(product_name, normalized_name)
def classify_products_batch(self, product_names: List[str],
sales_volumes: Optional[Dict[str, float]] = None) -> List[ProductSuggestion]:
"""Classify multiple products and detect business model"""
suggestions = []
for name in product_names:
volume = sales_volumes.get(name) if sales_volumes else None
suggestion = self.classify_product(name, volume)
suggestions.append(suggestion)
# Analyze business model based on classification results
self._analyze_business_model(suggestions)
return suggestions
def _normalize_name(self, name: str) -> str:
"""Normalize product name for pattern matching"""
if not name:
return ""
# Convert to lowercase
normalized = name.lower().strip()
# Remove common prefixes/suffixes
prefixes_to_remove = ['el ', 'la ', 'los ', 'las ', 'un ', 'una ']
for prefix in prefixes_to_remove:
if normalized.startswith(prefix):
normalized = normalized[len(prefix):]
# Remove special characters but keep spaces and accents
normalized = re.sub(r'[^\w\sáéíóúñü]', ' ', normalized)
# Normalize multiple spaces
normalized = re.sub(r'\s+', ' ', normalized).strip()
return normalized
def _classify_as_ingredient(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]:
"""Try to classify as ingredient"""
best_match = None
best_score = 0.0
for category, config in self.ingredient_patterns.items():
for pattern in config['patterns']:
if re.search(pattern, normalized_name, re.IGNORECASE):
# Calculate confidence based on pattern specificity
score = self._calculate_confidence_score(pattern, normalized_name)
if score > best_score:
best_score = score
best_match = (category, config)
if best_match and best_score >= 0.6:
category, config = best_match
return ProductSuggestion(
original_name=original_name,
suggested_name=self._suggest_clean_name(original_name, normalized_name),
product_type=ProductType.INGREDIENT,
category=category.value,
unit_of_measure=config['unit'],
confidence_score=best_score,
estimated_shelf_life_days=config.get('shelf_life'),
requires_refrigeration=config.get('refrigeration', False),
requires_freezing=config.get('freezing', False),
suggested_supplier=self._suggest_supplier(normalized_name, config.get('supplier_hints', [])),
notes=f"Auto-classified as {category.value} ingredient"
)
return None
def _classify_as_finished_product(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]:
"""Try to classify as finished product"""
best_match = None
best_score = 0.0
for category, config in self.product_patterns.items():
for pattern in config['patterns']:
if re.search(pattern, normalized_name, re.IGNORECASE):
score = self._calculate_confidence_score(pattern, normalized_name)
if score > best_score:
best_score = score
best_match = (category, config)
if best_match:
category, config = best_match
# Check if seasonal
is_seasonal = self._is_seasonal_product(normalized_name)
return ProductSuggestion(
original_name=original_name,
suggested_name=self._suggest_clean_name(original_name, normalized_name),
product_type=ProductType.FINISHED_PRODUCT,
category=category.value,
unit_of_measure=config['unit'],
confidence_score=best_score,
estimated_shelf_life_days=config.get('shelf_life'),
requires_refrigeration=config.get('refrigeration', False),
requires_freezing=config.get('freezing', False),
is_seasonal=is_seasonal,
notes=f"Auto-classified as {category.value}"
)
return None
def _create_fallback_suggestion(self, original_name: str, normalized_name: str) -> ProductSuggestion:
"""Create a fallback suggestion for unclassified products"""
return ProductSuggestion(
original_name=original_name,
suggested_name=self._suggest_clean_name(original_name, normalized_name),
product_type=ProductType.FINISHED_PRODUCT,
category=ProductCategory.OTHER_PRODUCTS.value,
unit_of_measure=UnitOfMeasure.UNITS,
confidence_score=0.3,
estimated_shelf_life_days=3,
notes="Needs manual classification - defaulted to finished product"
)
def _calculate_confidence_score(self, pattern: str, normalized_name: str) -> float:
"""Calculate confidence score for pattern match"""
# Base score for match
base_score = 0.8
# Boost score for exact matches
if pattern.lower() == normalized_name:
return 0.95
# Boost score for word boundary matches
if re.search(r'\b' + pattern + r'\b', normalized_name, re.IGNORECASE):
base_score += 0.1
# Reduce score for partial matches
if len(pattern) < len(normalized_name) / 2:
base_score -= 0.2
return min(0.95, max(0.3, base_score))
def _suggest_clean_name(self, original_name: str, normalized_name: str) -> str:
"""Suggest a cleaned version of the product name"""
# Capitalize properly
words = original_name.split()
cleaned = []
for word in words:
if len(word) > 0:
# Keep original casing for abbreviations
if word.isupper() and len(word) <= 3:
cleaned.append(word)
else:
cleaned.append(word.capitalize())
return ' '.join(cleaned)
def _suggest_supplier(self, normalized_name: str, supplier_hints: List[str]) -> Optional[str]:
"""Suggest potential supplier based on product type"""
for hint in supplier_hints:
if hint in normalized_name:
return f"Suggested: {hint.title()}"
return None
def _is_seasonal_product(self, normalized_name: str) -> bool:
"""Check if product appears to be seasonal"""
for season, patterns in self.seasonal_patterns.items():
for pattern in patterns:
if re.search(pattern, normalized_name, re.IGNORECASE):
return True
return False
def _analyze_business_model(self, suggestions: List[ProductSuggestion]) -> Dict[str, Any]:
"""Analyze business model based on product classifications"""
ingredient_count = sum(1 for s in suggestions if s.product_type == ProductType.INGREDIENT)
finished_count = sum(1 for s in suggestions if s.product_type == ProductType.FINISHED_PRODUCT)
total = len(suggestions)
if total == 0:
return {"model": "unknown", "confidence": 0.0}
ingredient_ratio = ingredient_count / total
if ingredient_ratio >= 0.7:
model = "production" # Production bakery
elif ingredient_ratio <= 0.3:
model = "retail" # Retail/Distribution bakery
else:
model = "hybrid" # Mixed model
confidence = max(abs(ingredient_ratio - 0.5) * 2, 0.1)
logger.info("Business model analysis",
model=model, confidence=confidence,
ingredient_count=ingredient_count,
finished_count=finished_count)
return {
"model": model,
"confidence": confidence,
"ingredient_ratio": ingredient_ratio,
"recommendations": self._get_model_recommendations(model)
}
def _get_model_recommendations(self, model: str) -> List[str]:
"""Get recommendations based on detected business model"""
recommendations = {
"production": [
"Focus on ingredient inventory management",
"Set up recipe cost calculation",
"Configure supplier relationships",
"Enable production planning features"
],
"retail": [
"Configure central baker relationships",
"Set up delivery schedule tracking",
"Enable finished product freshness monitoring",
"Focus on sales forecasting"
],
"hybrid": [
"Configure both ingredient and finished product management",
"Set up flexible inventory categories",
"Enable both production and retail features"
]
}
return recommendations.get(model, [])
# Dependency injection
def get_product_classifier() -> ProductClassifierService:
"""Get product classifier service instance"""
return ProductClassifierService()