# services/inventory/app/services/product_classifier.py """ AI Product Classification Service Automatically classifies products from sales data during onboarding """ import re import structlog from typing import Dict, Any, List, Optional, Tuple from enum import Enum from dataclasses import dataclass from app.models.inventory import ProductType, IngredientCategory, ProductCategory, UnitOfMeasure logger = structlog.get_logger() @dataclass class ProductSuggestion: """Suggested inventory item from sales data analysis""" original_name: str suggested_name: str product_type: ProductType category: str # ingredient_category or product_category unit_of_measure: UnitOfMeasure confidence_score: float # 0.0 to 1.0 estimated_shelf_life_days: Optional[int] = None requires_refrigeration: bool = False requires_freezing: bool = False is_seasonal: bool = False suggested_supplier: Optional[str] = None notes: Optional[str] = None class ProductClassifierService: """AI-powered product classification for onboarding automation""" def __init__(self): self._load_classification_rules() def _load_classification_rules(self): """Load classification patterns and rules""" # Ingredient patterns with high confidence self.ingredient_patterns = { IngredientCategory.FLOUR: { 'patterns': [ r'harina', r'flour', r'trigo', r'wheat', r'integral', r'whole.*wheat', r'centeno', r'rye', r'avena', r'oat', r'maiz', r'corn' ], 'unit': UnitOfMeasure.KILOGRAMS, 'shelf_life': 365, 'supplier_hints': ['molinos', 'harinera', 'mill'] }, IngredientCategory.YEAST: { 'patterns': [ r'levadura', r'yeast', r'fermento', r'baker.*yeast', r'instant.*yeast' ], 'unit': UnitOfMeasure.GRAMS, 'shelf_life': 730, 'refrigeration': True }, IngredientCategory.DAIRY: { 'patterns': [ r'leche', r'milk', r'nata', r'cream', r'mantequilla', r'butter', r'queso', r'cheese', r'yogur', r'yogurt' ], 'unit': UnitOfMeasure.LITERS, 'shelf_life': 7, 'refrigeration': True }, IngredientCategory.EGGS: { 'patterns': [ r'huevo', r'egg', r'clara', r'white', r'yema', r'yolk' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 28, 'refrigeration': True }, IngredientCategory.SUGAR: { 'patterns': [ r'azucar', r'sugar', r'edulcorante', r'sweetener', r'miel', r'honey', r'jarabe', r'syrup', r'mascabado', r'brown.*sugar' ], 'unit': UnitOfMeasure.KILOGRAMS, 'shelf_life': 730 }, IngredientCategory.FATS: { 'patterns': [ r'aceite', r'oil', r'grasa', r'fat', r'margarina', r'margarine', r'manteca', r'lard', r'oliva', r'olive' ], 'unit': UnitOfMeasure.LITERS, 'shelf_life': 365 }, IngredientCategory.SALT: { 'patterns': [ r'sal', r'salt', r'sodium', r'sodio' ], 'unit': UnitOfMeasure.KILOGRAMS, 'shelf_life': 1825 # 5 years }, IngredientCategory.SPICES: { 'patterns': [ r'canela', r'cinnamon', r'vainilla', r'vanilla', r'cacao', r'cocoa', r'chocolate', r'anis', r'anise', r'cardamomo', r'cardamom', r'jengibre', r'ginger', r'nuez.*moscada', r'nutmeg' ], 'unit': UnitOfMeasure.GRAMS, 'shelf_life': 730 }, IngredientCategory.ADDITIVES: { 'patterns': [ r'polvo.*hornear', r'baking.*powder', r'bicarbonato', r'soda', r'cremor.*tartaro', r'cream.*tartar', r'lecitina', r'lecithin', r'conservante', r'preservative', r'emulsificante', r'emulsifier' ], 'unit': UnitOfMeasure.GRAMS, 'shelf_life': 730 }, IngredientCategory.PACKAGING: { 'patterns': [ r'bolsa', r'bag', r'envase', r'container', r'papel', r'paper', r'plastico', r'plastic', r'carton', r'cardboard' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 1825 } } # Finished product patterns self.product_patterns = { ProductCategory.BREAD: { 'patterns': [ r'pan\b', r'bread', r'baguette', r'hogaza', r'loaf', r'molde', r'integral', r'whole.*grain', r'centeno', r'rye.*bread' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 3, 'display_life': 24 # hours }, ProductCategory.CROISSANTS: { 'patterns': [ r'croissant', r'cruasan', r'napolitana', r'palmera', r'palmier' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 2, 'display_life': 12 }, ProductCategory.PASTRIES: { 'patterns': [ r'pastel', r'pastry', r'hojaldre', r'puff.*pastry', r'empanada', r'milhojas', r'napoleon', r'eclair', r'profiterol' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 2, 'display_life': 24, 'refrigeration': True }, ProductCategory.CAKES: { 'patterns': [ r'tarta', r'cake', r'bizcocho', r'sponge', r'cheesecake', r'tiramisu', r'mousse', r'torta' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 3, 'refrigeration': True }, ProductCategory.COOKIES: { 'patterns': [ r'galleta', r'cookie', r'biscuit', r'mantecada', r'madeleine' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 14 }, ProductCategory.MUFFINS: { 'patterns': [ r'muffin', r'magdalena', r'cupcake', r'fairy.*cake' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 3 }, ProductCategory.SANDWICHES: { 'patterns': [ r'sandwich', r'bocadillo', r'tostada', r'toast', r'bagel' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 1, 'display_life': 6, 'refrigeration': True }, ProductCategory.BEVERAGES: { 'patterns': [ r'cafe', r'coffee', r'te\b', r'tea', r'chocolate.*caliente', r'hot.*chocolate', r'zumo', r'juice', r'batido', r'smoothie' ], 'unit': UnitOfMeasure.UNITS, 'shelf_life': 1 } } # Seasonal indicators self.seasonal_patterns = { 'christmas': [r'navidad', r'christmas', r'turron', r'polvoron', r'roscon'], 'easter': [r'pascua', r'easter', r'mona', r'torrija'], 'summer': [r'helado', r'ice.*cream', r'granizado', r'sorbete'] } def classify_product(self, product_name: str, sales_volume: Optional[float] = None) -> ProductSuggestion: """Classify a single product name into inventory suggestion""" # Normalize product name for analysis normalized_name = self._normalize_name(product_name) # Try to classify as ingredient first ingredient_result = self._classify_as_ingredient(normalized_name, product_name) if ingredient_result and ingredient_result.confidence_score >= 0.7: return ingredient_result # Try to classify as finished product product_result = self._classify_as_finished_product(normalized_name, product_name) if product_result: return product_result # Fallback: create generic finished product with low confidence return self._create_fallback_suggestion(product_name, normalized_name) def classify_products_batch(self, product_names: List[str], sales_volumes: Optional[Dict[str, float]] = None) -> List[ProductSuggestion]: """Classify multiple products and detect business model""" suggestions = [] for name in product_names: volume = sales_volumes.get(name) if sales_volumes else None suggestion = self.classify_product(name, volume) suggestions.append(suggestion) # Analyze business model based on classification results self._analyze_business_model(suggestions) return suggestions def _normalize_name(self, name: str) -> str: """Normalize product name for pattern matching""" if not name: return "" # Convert to lowercase normalized = name.lower().strip() # Remove common prefixes/suffixes prefixes_to_remove = ['el ', 'la ', 'los ', 'las ', 'un ', 'una '] for prefix in prefixes_to_remove: if normalized.startswith(prefix): normalized = normalized[len(prefix):] # Remove special characters but keep spaces and accents normalized = re.sub(r'[^\w\sáéíóúñü]', ' ', normalized) # Normalize multiple spaces normalized = re.sub(r'\s+', ' ', normalized).strip() return normalized def _classify_as_ingredient(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]: """Try to classify as ingredient""" best_match = None best_score = 0.0 for category, config in self.ingredient_patterns.items(): for pattern in config['patterns']: if re.search(pattern, normalized_name, re.IGNORECASE): # Calculate confidence based on pattern specificity score = self._calculate_confidence_score(pattern, normalized_name) if score > best_score: best_score = score best_match = (category, config) if best_match and best_score >= 0.6: category, config = best_match return ProductSuggestion( original_name=original_name, suggested_name=self._suggest_clean_name(original_name, normalized_name), product_type=ProductType.INGREDIENT, category=category.value, unit_of_measure=config['unit'], confidence_score=best_score, estimated_shelf_life_days=config.get('shelf_life'), requires_refrigeration=config.get('refrigeration', False), requires_freezing=config.get('freezing', False), suggested_supplier=self._suggest_supplier(normalized_name, config.get('supplier_hints', [])), notes=f"Auto-classified as {category.value} ingredient" ) return None def _classify_as_finished_product(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]: """Try to classify as finished product""" best_match = None best_score = 0.0 for category, config in self.product_patterns.items(): for pattern in config['patterns']: if re.search(pattern, normalized_name, re.IGNORECASE): score = self._calculate_confidence_score(pattern, normalized_name) if score > best_score: best_score = score best_match = (category, config) if best_match: category, config = best_match # Check if seasonal is_seasonal = self._is_seasonal_product(normalized_name) return ProductSuggestion( original_name=original_name, suggested_name=self._suggest_clean_name(original_name, normalized_name), product_type=ProductType.FINISHED_PRODUCT, category=category.value, unit_of_measure=config['unit'], confidence_score=best_score, estimated_shelf_life_days=config.get('shelf_life'), requires_refrigeration=config.get('refrigeration', False), requires_freezing=config.get('freezing', False), is_seasonal=is_seasonal, notes=f"Auto-classified as {category.value}" ) return None def _create_fallback_suggestion(self, original_name: str, normalized_name: str) -> ProductSuggestion: """Create a fallback suggestion for unclassified products""" return ProductSuggestion( original_name=original_name, suggested_name=self._suggest_clean_name(original_name, normalized_name), product_type=ProductType.FINISHED_PRODUCT, category=ProductCategory.OTHER_PRODUCTS.value, unit_of_measure=UnitOfMeasure.UNITS, confidence_score=0.3, estimated_shelf_life_days=3, notes="Needs manual classification - defaulted to finished product" ) def _calculate_confidence_score(self, pattern: str, normalized_name: str) -> float: """Calculate confidence score for pattern match""" # Base score for match base_score = 0.8 # Boost score for exact matches if pattern.lower() == normalized_name: return 0.95 # Boost score for word boundary matches if re.search(r'\b' + pattern + r'\b', normalized_name, re.IGNORECASE): base_score += 0.1 # Reduce score for partial matches if len(pattern) < len(normalized_name) / 2: base_score -= 0.2 return min(0.95, max(0.3, base_score)) def _suggest_clean_name(self, original_name: str, normalized_name: str) -> str: """Suggest a cleaned version of the product name""" # Capitalize properly words = original_name.split() cleaned = [] for word in words: if len(word) > 0: # Keep original casing for abbreviations if word.isupper() and len(word) <= 3: cleaned.append(word) else: cleaned.append(word.capitalize()) return ' '.join(cleaned) def _suggest_supplier(self, normalized_name: str, supplier_hints: List[str]) -> Optional[str]: """Suggest potential supplier based on product type""" for hint in supplier_hints: if hint in normalized_name: return f"Suggested: {hint.title()}" return None def _is_seasonal_product(self, normalized_name: str) -> bool: """Check if product appears to be seasonal""" for season, patterns in self.seasonal_patterns.items(): for pattern in patterns: if re.search(pattern, normalized_name, re.IGNORECASE): return True return False def _analyze_business_model(self, suggestions: List[ProductSuggestion]) -> Dict[str, Any]: """Analyze business model based on product classifications""" ingredient_count = sum(1 for s in suggestions if s.product_type == ProductType.INGREDIENT) finished_count = sum(1 for s in suggestions if s.product_type == ProductType.FINISHED_PRODUCT) total = len(suggestions) if total == 0: return {"model": "unknown", "confidence": 0.0} ingredient_ratio = ingredient_count / total if ingredient_ratio >= 0.7: model = "production" # Production bakery elif ingredient_ratio <= 0.3: model = "retail" # Retail/Distribution bakery else: model = "hybrid" # Mixed model confidence = max(abs(ingredient_ratio - 0.5) * 2, 0.1) logger.info("Business model analysis", model=model, confidence=confidence, ingredient_count=ingredient_count, finished_count=finished_count) return { "model": model, "confidence": confidence, "ingredient_ratio": ingredient_ratio, "recommendations": self._get_model_recommendations(model) } def _get_model_recommendations(self, model: str) -> List[str]: """Get recommendations based on detected business model""" recommendations = { "production": [ "Focus on ingredient inventory management", "Set up recipe cost calculation", "Configure supplier relationships", "Enable production planning features" ], "retail": [ "Configure central baker relationships", "Set up delivery schedule tracking", "Enable finished product freshness monitoring", "Focus on sales forecasting" ], "hybrid": [ "Configure both ingredient and finished product management", "Set up flexible inventory categories", "Enable both production and retail features" ] } return recommendations.get(model, []) # Dependency injection def get_product_classifier() -> ProductClassifierService: """Get product classifier service instance""" return ProductClassifierService()