467 lines
18 KiB
Python
467 lines
18 KiB
Python
# services/inventory/app/services/product_classifier.py
|
|
"""
|
|
AI Product Classification Service
|
|
Automatically classifies products from sales data during onboarding
|
|
"""
|
|
|
|
import re
|
|
import structlog
|
|
from typing import Dict, Any, List, Optional, Tuple
|
|
from enum import Enum
|
|
from dataclasses import dataclass
|
|
|
|
from app.models.inventory import ProductType, IngredientCategory, ProductCategory, UnitOfMeasure
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
@dataclass
|
|
class ProductSuggestion:
|
|
"""Suggested inventory item from sales data analysis"""
|
|
original_name: str
|
|
suggested_name: str
|
|
product_type: ProductType
|
|
category: str # ingredient_category or product_category
|
|
unit_of_measure: UnitOfMeasure
|
|
confidence_score: float # 0.0 to 1.0
|
|
estimated_shelf_life_days: Optional[int] = None
|
|
requires_refrigeration: bool = False
|
|
requires_freezing: bool = False
|
|
is_seasonal: bool = False
|
|
suggested_supplier: Optional[str] = None
|
|
notes: Optional[str] = None
|
|
|
|
|
|
class ProductClassifierService:
|
|
"""AI-powered product classification for onboarding automation"""
|
|
|
|
def __init__(self):
|
|
self._load_classification_rules()
|
|
|
|
def _load_classification_rules(self):
|
|
"""Load classification patterns and rules"""
|
|
|
|
# Ingredient patterns with high confidence
|
|
self.ingredient_patterns = {
|
|
IngredientCategory.FLOUR: {
|
|
'patterns': [
|
|
r'harina', r'flour', r'trigo', r'wheat', r'integral', r'whole.*wheat',
|
|
r'centeno', r'rye', r'avena', r'oat', r'maiz', r'corn'
|
|
],
|
|
'unit': UnitOfMeasure.KILOGRAMS,
|
|
'shelf_life': 365,
|
|
'supplier_hints': ['molinos', 'harinera', 'mill']
|
|
},
|
|
IngredientCategory.YEAST: {
|
|
'patterns': [
|
|
r'levadura', r'yeast', r'fermento', r'baker.*yeast', r'instant.*yeast'
|
|
],
|
|
'unit': UnitOfMeasure.GRAMS,
|
|
'shelf_life': 730,
|
|
'refrigeration': True
|
|
},
|
|
IngredientCategory.DAIRY: {
|
|
'patterns': [
|
|
r'leche', r'milk', r'nata', r'cream', r'mantequilla', r'butter',
|
|
r'queso', r'cheese', r'yogur', r'yogurt'
|
|
],
|
|
'unit': UnitOfMeasure.LITERS,
|
|
'shelf_life': 7,
|
|
'refrigeration': True
|
|
},
|
|
IngredientCategory.EGGS: {
|
|
'patterns': [
|
|
r'huevo', r'egg', r'clara', r'white', r'yema', r'yolk'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 28,
|
|
'refrigeration': True
|
|
},
|
|
IngredientCategory.SUGAR: {
|
|
'patterns': [
|
|
r'azucar', r'sugar', r'edulcorante', r'sweetener', r'miel', r'honey',
|
|
r'jarabe', r'syrup', r'mascabado', r'brown.*sugar'
|
|
],
|
|
'unit': UnitOfMeasure.KILOGRAMS,
|
|
'shelf_life': 730
|
|
},
|
|
IngredientCategory.FATS: {
|
|
'patterns': [
|
|
r'aceite', r'oil', r'grasa', r'fat', r'margarina', r'margarine',
|
|
r'manteca', r'lard', r'oliva', r'olive'
|
|
],
|
|
'unit': UnitOfMeasure.LITERS,
|
|
'shelf_life': 365
|
|
},
|
|
IngredientCategory.SALT: {
|
|
'patterns': [
|
|
r'sal', r'salt', r'sodium', r'sodio'
|
|
],
|
|
'unit': UnitOfMeasure.KILOGRAMS,
|
|
'shelf_life': 1825 # 5 years
|
|
},
|
|
IngredientCategory.SPICES: {
|
|
'patterns': [
|
|
r'canela', r'cinnamon', r'vainilla', r'vanilla', r'cacao', r'cocoa',
|
|
r'chocolate', r'anis', r'anise', r'cardamomo', r'cardamom',
|
|
r'jengibre', r'ginger', r'nuez.*moscada', r'nutmeg'
|
|
],
|
|
'unit': UnitOfMeasure.GRAMS,
|
|
'shelf_life': 730
|
|
},
|
|
IngredientCategory.ADDITIVES: {
|
|
'patterns': [
|
|
r'polvo.*hornear', r'baking.*powder', r'bicarbonato', r'soda',
|
|
r'cremor.*tartaro', r'cream.*tartar', r'lecitina', r'lecithin',
|
|
r'conservante', r'preservative', r'emulsificante', r'emulsifier'
|
|
],
|
|
'unit': UnitOfMeasure.GRAMS,
|
|
'shelf_life': 730
|
|
},
|
|
IngredientCategory.PACKAGING: {
|
|
'patterns': [
|
|
r'bolsa', r'bag', r'envase', r'container', r'papel', r'paper',
|
|
r'plastico', r'plastic', r'carton', r'cardboard'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 1825
|
|
}
|
|
}
|
|
|
|
# Finished product patterns
|
|
self.product_patterns = {
|
|
ProductCategory.BREAD: {
|
|
'patterns': [
|
|
r'pan\b', r'bread', r'baguette', r'hogaza', r'loaf', r'molde',
|
|
r'integral', r'whole.*grain', r'centeno', r'rye.*bread'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 3,
|
|
'display_life': 24 # hours
|
|
},
|
|
ProductCategory.CROISSANTS: {
|
|
'patterns': [
|
|
r'croissant', r'cruasan', r'napolitana', r'palmera', r'palmier'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 2,
|
|
'display_life': 12
|
|
},
|
|
ProductCategory.PASTRIES: {
|
|
'patterns': [
|
|
r'pastel', r'pastry', r'hojaldre', r'puff.*pastry', r'empanada',
|
|
r'milhojas', r'napoleon', r'eclair', r'profiterol'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 2,
|
|
'display_life': 24,
|
|
'refrigeration': True
|
|
},
|
|
ProductCategory.CAKES: {
|
|
'patterns': [
|
|
r'tarta', r'cake', r'bizcocho', r'sponge', r'cheesecake',
|
|
r'tiramisu', r'mousse', r'torta'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 3,
|
|
'refrigeration': True
|
|
},
|
|
ProductCategory.COOKIES: {
|
|
'patterns': [
|
|
r'galleta', r'cookie', r'biscuit', r'mantecada', r'madeleine'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 14
|
|
},
|
|
ProductCategory.MUFFINS: {
|
|
'patterns': [
|
|
r'muffin', r'magdalena', r'cupcake', r'fairy.*cake'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 3
|
|
},
|
|
ProductCategory.SANDWICHES: {
|
|
'patterns': [
|
|
r'sandwich', r'bocadillo', r'tostada', r'toast', r'bagel'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 1,
|
|
'display_life': 6,
|
|
'refrigeration': True
|
|
},
|
|
ProductCategory.BEVERAGES: {
|
|
'patterns': [
|
|
r'cafe', r'coffee', r'te\b', r'tea', r'chocolate.*caliente',
|
|
r'hot.*chocolate', r'zumo', r'juice', r'batido', r'smoothie'
|
|
],
|
|
'unit': UnitOfMeasure.UNITS,
|
|
'shelf_life': 1
|
|
}
|
|
}
|
|
|
|
# Seasonal indicators
|
|
self.seasonal_patterns = {
|
|
'christmas': [r'navidad', r'christmas', r'turron', r'polvoron', r'roscon'],
|
|
'easter': [r'pascua', r'easter', r'mona', r'torrija'],
|
|
'summer': [r'helado', r'ice.*cream', r'granizado', r'sorbete']
|
|
}
|
|
|
|
def classify_product(self, product_name: str, sales_volume: Optional[float] = None) -> ProductSuggestion:
|
|
"""Classify a single product name into inventory suggestion"""
|
|
|
|
# Normalize product name for analysis
|
|
normalized_name = self._normalize_name(product_name)
|
|
|
|
# Try to classify as ingredient first
|
|
ingredient_result = self._classify_as_ingredient(normalized_name, product_name)
|
|
if ingredient_result and ingredient_result.confidence_score >= 0.7:
|
|
return ingredient_result
|
|
|
|
# Try to classify as finished product
|
|
product_result = self._classify_as_finished_product(normalized_name, product_name)
|
|
if product_result:
|
|
return product_result
|
|
|
|
# Fallback: create generic finished product with low confidence
|
|
return self._create_fallback_suggestion(product_name, normalized_name)
|
|
|
|
def classify_products_batch(self, product_names: List[str],
|
|
sales_volumes: Optional[Dict[str, float]] = None) -> List[ProductSuggestion]:
|
|
"""Classify multiple products and detect business model"""
|
|
|
|
suggestions = []
|
|
for name in product_names:
|
|
volume = sales_volumes.get(name) if sales_volumes else None
|
|
suggestion = self.classify_product(name, volume)
|
|
suggestions.append(suggestion)
|
|
|
|
# Analyze business model based on classification results
|
|
self._analyze_business_model(suggestions)
|
|
|
|
return suggestions
|
|
|
|
def _normalize_name(self, name: str) -> str:
|
|
"""Normalize product name for pattern matching"""
|
|
if not name:
|
|
return ""
|
|
|
|
# Convert to lowercase
|
|
normalized = name.lower().strip()
|
|
|
|
# Remove common prefixes/suffixes
|
|
prefixes_to_remove = ['el ', 'la ', 'los ', 'las ', 'un ', 'una ']
|
|
for prefix in prefixes_to_remove:
|
|
if normalized.startswith(prefix):
|
|
normalized = normalized[len(prefix):]
|
|
|
|
# Remove special characters but keep spaces and accents
|
|
normalized = re.sub(r'[^\w\sáéíóúñü]', ' ', normalized)
|
|
|
|
# Normalize multiple spaces
|
|
normalized = re.sub(r'\s+', ' ', normalized).strip()
|
|
|
|
return normalized
|
|
|
|
def _classify_as_ingredient(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]:
|
|
"""Try to classify as ingredient"""
|
|
|
|
best_match = None
|
|
best_score = 0.0
|
|
|
|
for category, config in self.ingredient_patterns.items():
|
|
for pattern in config['patterns']:
|
|
if re.search(pattern, normalized_name, re.IGNORECASE):
|
|
# Calculate confidence based on pattern specificity
|
|
score = self._calculate_confidence_score(pattern, normalized_name)
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = (category, config)
|
|
|
|
if best_match and best_score >= 0.6:
|
|
category, config = best_match
|
|
|
|
return ProductSuggestion(
|
|
original_name=original_name,
|
|
suggested_name=self._suggest_clean_name(original_name, normalized_name),
|
|
product_type=ProductType.INGREDIENT,
|
|
category=category.value,
|
|
unit_of_measure=config['unit'],
|
|
confidence_score=best_score,
|
|
estimated_shelf_life_days=config.get('shelf_life'),
|
|
requires_refrigeration=config.get('refrigeration', False),
|
|
requires_freezing=config.get('freezing', False),
|
|
suggested_supplier=self._suggest_supplier(normalized_name, config.get('supplier_hints', [])),
|
|
notes=f"Auto-classified as {category.value} ingredient"
|
|
)
|
|
|
|
return None
|
|
|
|
def _classify_as_finished_product(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]:
|
|
"""Try to classify as finished product"""
|
|
|
|
best_match = None
|
|
best_score = 0.0
|
|
|
|
for category, config in self.product_patterns.items():
|
|
for pattern in config['patterns']:
|
|
if re.search(pattern, normalized_name, re.IGNORECASE):
|
|
score = self._calculate_confidence_score(pattern, normalized_name)
|
|
if score > best_score:
|
|
best_score = score
|
|
best_match = (category, config)
|
|
|
|
if best_match:
|
|
category, config = best_match
|
|
|
|
# Check if seasonal
|
|
is_seasonal = self._is_seasonal_product(normalized_name)
|
|
|
|
return ProductSuggestion(
|
|
original_name=original_name,
|
|
suggested_name=self._suggest_clean_name(original_name, normalized_name),
|
|
product_type=ProductType.FINISHED_PRODUCT,
|
|
category=category.value,
|
|
unit_of_measure=config['unit'],
|
|
confidence_score=best_score,
|
|
estimated_shelf_life_days=config.get('shelf_life'),
|
|
requires_refrigeration=config.get('refrigeration', False),
|
|
requires_freezing=config.get('freezing', False),
|
|
is_seasonal=is_seasonal,
|
|
notes=f"Auto-classified as {category.value}"
|
|
)
|
|
|
|
return None
|
|
|
|
def _create_fallback_suggestion(self, original_name: str, normalized_name: str) -> ProductSuggestion:
|
|
"""Create a fallback suggestion for unclassified products"""
|
|
|
|
return ProductSuggestion(
|
|
original_name=original_name,
|
|
suggested_name=self._suggest_clean_name(original_name, normalized_name),
|
|
product_type=ProductType.FINISHED_PRODUCT,
|
|
category=ProductCategory.OTHER_PRODUCTS.value,
|
|
unit_of_measure=UnitOfMeasure.UNITS,
|
|
confidence_score=0.3,
|
|
estimated_shelf_life_days=3,
|
|
notes="Needs manual classification - defaulted to finished product"
|
|
)
|
|
|
|
def _calculate_confidence_score(self, pattern: str, normalized_name: str) -> float:
|
|
"""Calculate confidence score for pattern match"""
|
|
|
|
# Base score for match
|
|
base_score = 0.8
|
|
|
|
# Boost score for exact matches
|
|
if pattern.lower() == normalized_name:
|
|
return 0.95
|
|
|
|
# Boost score for word boundary matches
|
|
if re.search(r'\b' + pattern + r'\b', normalized_name, re.IGNORECASE):
|
|
base_score += 0.1
|
|
|
|
# Reduce score for partial matches
|
|
if len(pattern) < len(normalized_name) / 2:
|
|
base_score -= 0.2
|
|
|
|
return min(0.95, max(0.3, base_score))
|
|
|
|
def _suggest_clean_name(self, original_name: str, normalized_name: str) -> str:
|
|
"""Suggest a cleaned version of the product name"""
|
|
|
|
# Capitalize properly
|
|
words = original_name.split()
|
|
cleaned = []
|
|
|
|
for word in words:
|
|
if len(word) > 0:
|
|
# Keep original casing for abbreviations
|
|
if word.isupper() and len(word) <= 3:
|
|
cleaned.append(word)
|
|
else:
|
|
cleaned.append(word.capitalize())
|
|
|
|
return ' '.join(cleaned)
|
|
|
|
def _suggest_supplier(self, normalized_name: str, supplier_hints: List[str]) -> Optional[str]:
|
|
"""Suggest potential supplier based on product type"""
|
|
|
|
for hint in supplier_hints:
|
|
if hint in normalized_name:
|
|
return f"Suggested: {hint.title()}"
|
|
|
|
return None
|
|
|
|
def _is_seasonal_product(self, normalized_name: str) -> bool:
|
|
"""Check if product appears to be seasonal"""
|
|
|
|
for season, patterns in self.seasonal_patterns.items():
|
|
for pattern in patterns:
|
|
if re.search(pattern, normalized_name, re.IGNORECASE):
|
|
return True
|
|
|
|
return False
|
|
|
|
def _analyze_business_model(self, suggestions: List[ProductSuggestion]) -> Dict[str, Any]:
|
|
"""Analyze business model based on product classifications"""
|
|
|
|
ingredient_count = sum(1 for s in suggestions if s.product_type == ProductType.INGREDIENT)
|
|
finished_count = sum(1 for s in suggestions if s.product_type == ProductType.FINISHED_PRODUCT)
|
|
total = len(suggestions)
|
|
|
|
if total == 0:
|
|
return {"model": "unknown", "confidence": 0.0}
|
|
|
|
ingredient_ratio = ingredient_count / total
|
|
|
|
if ingredient_ratio >= 0.7:
|
|
model = "production" # Production bakery
|
|
elif ingredient_ratio <= 0.3:
|
|
model = "retail" # Retail/Distribution bakery
|
|
else:
|
|
model = "hybrid" # Mixed model
|
|
|
|
confidence = max(abs(ingredient_ratio - 0.5) * 2, 0.1)
|
|
|
|
logger.info("Business model analysis",
|
|
model=model, confidence=confidence,
|
|
ingredient_count=ingredient_count,
|
|
finished_count=finished_count)
|
|
|
|
return {
|
|
"model": model,
|
|
"confidence": confidence,
|
|
"ingredient_ratio": ingredient_ratio,
|
|
"recommendations": self._get_model_recommendations(model)
|
|
}
|
|
|
|
def _get_model_recommendations(self, model: str) -> List[str]:
|
|
"""Get recommendations based on detected business model"""
|
|
|
|
recommendations = {
|
|
"production": [
|
|
"Focus on ingredient inventory management",
|
|
"Set up recipe cost calculation",
|
|
"Configure supplier relationships",
|
|
"Enable production planning features"
|
|
],
|
|
"retail": [
|
|
"Configure central baker relationships",
|
|
"Set up delivery schedule tracking",
|
|
"Enable finished product freshness monitoring",
|
|
"Focus on sales forecasting"
|
|
],
|
|
"hybrid": [
|
|
"Configure both ingredient and finished product management",
|
|
"Set up flexible inventory categories",
|
|
"Enable both production and retail features"
|
|
]
|
|
}
|
|
|
|
return recommendations.get(model, [])
|
|
|
|
|
|
# Dependency injection
|
|
def get_product_classifier() -> ProductClassifierService:
|
|
"""Get product classifier service instance"""
|
|
return ProductClassifierService() |