Create new services: inventory, recipes, suppliers
This commit is contained in:
467
services/inventory/app/services/product_classifier.py
Normal file
467
services/inventory/app/services/product_classifier.py
Normal file
@@ -0,0 +1,467 @@
|
||||
# services/inventory/app/services/product_classifier.py
|
||||
"""
|
||||
AI Product Classification Service
|
||||
Automatically classifies products from sales data during onboarding
|
||||
"""
|
||||
|
||||
import re
|
||||
import structlog
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
|
||||
from app.models.inventory import ProductType, IngredientCategory, ProductCategory, UnitOfMeasure
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProductSuggestion:
|
||||
"""Suggested inventory item from sales data analysis"""
|
||||
original_name: str
|
||||
suggested_name: str
|
||||
product_type: ProductType
|
||||
category: str # ingredient_category or product_category
|
||||
unit_of_measure: UnitOfMeasure
|
||||
confidence_score: float # 0.0 to 1.0
|
||||
estimated_shelf_life_days: Optional[int] = None
|
||||
requires_refrigeration: bool = False
|
||||
requires_freezing: bool = False
|
||||
is_seasonal: bool = False
|
||||
suggested_supplier: Optional[str] = None
|
||||
notes: Optional[str] = None
|
||||
|
||||
|
||||
class ProductClassifierService:
|
||||
"""AI-powered product classification for onboarding automation"""
|
||||
|
||||
def __init__(self):
|
||||
self._load_classification_rules()
|
||||
|
||||
def _load_classification_rules(self):
|
||||
"""Load classification patterns and rules"""
|
||||
|
||||
# Ingredient patterns with high confidence
|
||||
self.ingredient_patterns = {
|
||||
IngredientCategory.FLOUR: {
|
||||
'patterns': [
|
||||
r'harina', r'flour', r'trigo', r'wheat', r'integral', r'whole.*wheat',
|
||||
r'centeno', r'rye', r'avena', r'oat', r'maiz', r'corn'
|
||||
],
|
||||
'unit': UnitOfMeasure.KILOGRAMS,
|
||||
'shelf_life': 365,
|
||||
'supplier_hints': ['molinos', 'harinera', 'mill']
|
||||
},
|
||||
IngredientCategory.YEAST: {
|
||||
'patterns': [
|
||||
r'levadura', r'yeast', r'fermento', r'baker.*yeast', r'instant.*yeast'
|
||||
],
|
||||
'unit': UnitOfMeasure.GRAMS,
|
||||
'shelf_life': 730,
|
||||
'refrigeration': True
|
||||
},
|
||||
IngredientCategory.DAIRY: {
|
||||
'patterns': [
|
||||
r'leche', r'milk', r'nata', r'cream', r'mantequilla', r'butter',
|
||||
r'queso', r'cheese', r'yogur', r'yogurt'
|
||||
],
|
||||
'unit': UnitOfMeasure.LITERS,
|
||||
'shelf_life': 7,
|
||||
'refrigeration': True
|
||||
},
|
||||
IngredientCategory.EGGS: {
|
||||
'patterns': [
|
||||
r'huevo', r'egg', r'clara', r'white', r'yema', r'yolk'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 28,
|
||||
'refrigeration': True
|
||||
},
|
||||
IngredientCategory.SUGAR: {
|
||||
'patterns': [
|
||||
r'azucar', r'sugar', r'edulcorante', r'sweetener', r'miel', r'honey',
|
||||
r'jarabe', r'syrup', r'mascabado', r'brown.*sugar'
|
||||
],
|
||||
'unit': UnitOfMeasure.KILOGRAMS,
|
||||
'shelf_life': 730
|
||||
},
|
||||
IngredientCategory.FATS: {
|
||||
'patterns': [
|
||||
r'aceite', r'oil', r'grasa', r'fat', r'margarina', r'margarine',
|
||||
r'manteca', r'lard', r'oliva', r'olive'
|
||||
],
|
||||
'unit': UnitOfMeasure.LITERS,
|
||||
'shelf_life': 365
|
||||
},
|
||||
IngredientCategory.SALT: {
|
||||
'patterns': [
|
||||
r'sal', r'salt', r'sodium', r'sodio'
|
||||
],
|
||||
'unit': UnitOfMeasure.KILOGRAMS,
|
||||
'shelf_life': 1825 # 5 years
|
||||
},
|
||||
IngredientCategory.SPICES: {
|
||||
'patterns': [
|
||||
r'canela', r'cinnamon', r'vainilla', r'vanilla', r'cacao', r'cocoa',
|
||||
r'chocolate', r'anis', r'anise', r'cardamomo', r'cardamom',
|
||||
r'jengibre', r'ginger', r'nuez.*moscada', r'nutmeg'
|
||||
],
|
||||
'unit': UnitOfMeasure.GRAMS,
|
||||
'shelf_life': 730
|
||||
},
|
||||
IngredientCategory.ADDITIVES: {
|
||||
'patterns': [
|
||||
r'polvo.*hornear', r'baking.*powder', r'bicarbonato', r'soda',
|
||||
r'cremor.*tartaro', r'cream.*tartar', r'lecitina', r'lecithin',
|
||||
r'conservante', r'preservative', r'emulsificante', r'emulsifier'
|
||||
],
|
||||
'unit': UnitOfMeasure.GRAMS,
|
||||
'shelf_life': 730
|
||||
},
|
||||
IngredientCategory.PACKAGING: {
|
||||
'patterns': [
|
||||
r'bolsa', r'bag', r'envase', r'container', r'papel', r'paper',
|
||||
r'plastico', r'plastic', r'carton', r'cardboard'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 1825
|
||||
}
|
||||
}
|
||||
|
||||
# Finished product patterns
|
||||
self.product_patterns = {
|
||||
ProductCategory.BREAD: {
|
||||
'patterns': [
|
||||
r'pan\b', r'bread', r'baguette', r'hogaza', r'loaf', r'molde',
|
||||
r'integral', r'whole.*grain', r'centeno', r'rye.*bread'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 3,
|
||||
'display_life': 24 # hours
|
||||
},
|
||||
ProductCategory.CROISSANTS: {
|
||||
'patterns': [
|
||||
r'croissant', r'cruasan', r'napolitana', r'palmera', r'palmier'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 2,
|
||||
'display_life': 12
|
||||
},
|
||||
ProductCategory.PASTRIES: {
|
||||
'patterns': [
|
||||
r'pastel', r'pastry', r'hojaldre', r'puff.*pastry', r'empanada',
|
||||
r'milhojas', r'napoleon', r'eclair', r'profiterol'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 2,
|
||||
'display_life': 24,
|
||||
'refrigeration': True
|
||||
},
|
||||
ProductCategory.CAKES: {
|
||||
'patterns': [
|
||||
r'tarta', r'cake', r'bizcocho', r'sponge', r'cheesecake',
|
||||
r'tiramisu', r'mousse', r'torta'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 3,
|
||||
'refrigeration': True
|
||||
},
|
||||
ProductCategory.COOKIES: {
|
||||
'patterns': [
|
||||
r'galleta', r'cookie', r'biscuit', r'mantecada', r'madeleine'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 14
|
||||
},
|
||||
ProductCategory.MUFFINS: {
|
||||
'patterns': [
|
||||
r'muffin', r'magdalena', r'cupcake', r'fairy.*cake'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 3
|
||||
},
|
||||
ProductCategory.SANDWICHES: {
|
||||
'patterns': [
|
||||
r'sandwich', r'bocadillo', r'tostada', r'toast', r'bagel'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 1,
|
||||
'display_life': 6,
|
||||
'refrigeration': True
|
||||
},
|
||||
ProductCategory.BEVERAGES: {
|
||||
'patterns': [
|
||||
r'cafe', r'coffee', r'te\b', r'tea', r'chocolate.*caliente',
|
||||
r'hot.*chocolate', r'zumo', r'juice', r'batido', r'smoothie'
|
||||
],
|
||||
'unit': UnitOfMeasure.UNITS,
|
||||
'shelf_life': 1
|
||||
}
|
||||
}
|
||||
|
||||
# Seasonal indicators
|
||||
self.seasonal_patterns = {
|
||||
'christmas': [r'navidad', r'christmas', r'turron', r'polvoron', r'roscon'],
|
||||
'easter': [r'pascua', r'easter', r'mona', r'torrija'],
|
||||
'summer': [r'helado', r'ice.*cream', r'granizado', r'sorbete']
|
||||
}
|
||||
|
||||
def classify_product(self, product_name: str, sales_volume: Optional[float] = None) -> ProductSuggestion:
|
||||
"""Classify a single product name into inventory suggestion"""
|
||||
|
||||
# Normalize product name for analysis
|
||||
normalized_name = self._normalize_name(product_name)
|
||||
|
||||
# Try to classify as ingredient first
|
||||
ingredient_result = self._classify_as_ingredient(normalized_name, product_name)
|
||||
if ingredient_result and ingredient_result.confidence_score >= 0.7:
|
||||
return ingredient_result
|
||||
|
||||
# Try to classify as finished product
|
||||
product_result = self._classify_as_finished_product(normalized_name, product_name)
|
||||
if product_result:
|
||||
return product_result
|
||||
|
||||
# Fallback: create generic finished product with low confidence
|
||||
return self._create_fallback_suggestion(product_name, normalized_name)
|
||||
|
||||
def classify_products_batch(self, product_names: List[str],
|
||||
sales_volumes: Optional[Dict[str, float]] = None) -> List[ProductSuggestion]:
|
||||
"""Classify multiple products and detect business model"""
|
||||
|
||||
suggestions = []
|
||||
for name in product_names:
|
||||
volume = sales_volumes.get(name) if sales_volumes else None
|
||||
suggestion = self.classify_product(name, volume)
|
||||
suggestions.append(suggestion)
|
||||
|
||||
# Analyze business model based on classification results
|
||||
self._analyze_business_model(suggestions)
|
||||
|
||||
return suggestions
|
||||
|
||||
def _normalize_name(self, name: str) -> str:
|
||||
"""Normalize product name for pattern matching"""
|
||||
if not name:
|
||||
return ""
|
||||
|
||||
# Convert to lowercase
|
||||
normalized = name.lower().strip()
|
||||
|
||||
# Remove common prefixes/suffixes
|
||||
prefixes_to_remove = ['el ', 'la ', 'los ', 'las ', 'un ', 'una ']
|
||||
for prefix in prefixes_to_remove:
|
||||
if normalized.startswith(prefix):
|
||||
normalized = normalized[len(prefix):]
|
||||
|
||||
# Remove special characters but keep spaces and accents
|
||||
normalized = re.sub(r'[^\w\sáéíóúñü]', ' ', normalized)
|
||||
|
||||
# Normalize multiple spaces
|
||||
normalized = re.sub(r'\s+', ' ', normalized).strip()
|
||||
|
||||
return normalized
|
||||
|
||||
def _classify_as_ingredient(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]:
|
||||
"""Try to classify as ingredient"""
|
||||
|
||||
best_match = None
|
||||
best_score = 0.0
|
||||
|
||||
for category, config in self.ingredient_patterns.items():
|
||||
for pattern in config['patterns']:
|
||||
if re.search(pattern, normalized_name, re.IGNORECASE):
|
||||
# Calculate confidence based on pattern specificity
|
||||
score = self._calculate_confidence_score(pattern, normalized_name)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = (category, config)
|
||||
|
||||
if best_match and best_score >= 0.6:
|
||||
category, config = best_match
|
||||
|
||||
return ProductSuggestion(
|
||||
original_name=original_name,
|
||||
suggested_name=self._suggest_clean_name(original_name, normalized_name),
|
||||
product_type=ProductType.INGREDIENT,
|
||||
category=category.value,
|
||||
unit_of_measure=config['unit'],
|
||||
confidence_score=best_score,
|
||||
estimated_shelf_life_days=config.get('shelf_life'),
|
||||
requires_refrigeration=config.get('refrigeration', False),
|
||||
requires_freezing=config.get('freezing', False),
|
||||
suggested_supplier=self._suggest_supplier(normalized_name, config.get('supplier_hints', [])),
|
||||
notes=f"Auto-classified as {category.value} ingredient"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def _classify_as_finished_product(self, normalized_name: str, original_name: str) -> Optional[ProductSuggestion]:
|
||||
"""Try to classify as finished product"""
|
||||
|
||||
best_match = None
|
||||
best_score = 0.0
|
||||
|
||||
for category, config in self.product_patterns.items():
|
||||
for pattern in config['patterns']:
|
||||
if re.search(pattern, normalized_name, re.IGNORECASE):
|
||||
score = self._calculate_confidence_score(pattern, normalized_name)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = (category, config)
|
||||
|
||||
if best_match:
|
||||
category, config = best_match
|
||||
|
||||
# Check if seasonal
|
||||
is_seasonal = self._is_seasonal_product(normalized_name)
|
||||
|
||||
return ProductSuggestion(
|
||||
original_name=original_name,
|
||||
suggested_name=self._suggest_clean_name(original_name, normalized_name),
|
||||
product_type=ProductType.FINISHED_PRODUCT,
|
||||
category=category.value,
|
||||
unit_of_measure=config['unit'],
|
||||
confidence_score=best_score,
|
||||
estimated_shelf_life_days=config.get('shelf_life'),
|
||||
requires_refrigeration=config.get('refrigeration', False),
|
||||
requires_freezing=config.get('freezing', False),
|
||||
is_seasonal=is_seasonal,
|
||||
notes=f"Auto-classified as {category.value}"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def _create_fallback_suggestion(self, original_name: str, normalized_name: str) -> ProductSuggestion:
|
||||
"""Create a fallback suggestion for unclassified products"""
|
||||
|
||||
return ProductSuggestion(
|
||||
original_name=original_name,
|
||||
suggested_name=self._suggest_clean_name(original_name, normalized_name),
|
||||
product_type=ProductType.FINISHED_PRODUCT,
|
||||
category=ProductCategory.OTHER_PRODUCTS.value,
|
||||
unit_of_measure=UnitOfMeasure.UNITS,
|
||||
confidence_score=0.3,
|
||||
estimated_shelf_life_days=3,
|
||||
notes="Needs manual classification - defaulted to finished product"
|
||||
)
|
||||
|
||||
def _calculate_confidence_score(self, pattern: str, normalized_name: str) -> float:
|
||||
"""Calculate confidence score for pattern match"""
|
||||
|
||||
# Base score for match
|
||||
base_score = 0.8
|
||||
|
||||
# Boost score for exact matches
|
||||
if pattern.lower() == normalized_name:
|
||||
return 0.95
|
||||
|
||||
# Boost score for word boundary matches
|
||||
if re.search(r'\b' + pattern + r'\b', normalized_name, re.IGNORECASE):
|
||||
base_score += 0.1
|
||||
|
||||
# Reduce score for partial matches
|
||||
if len(pattern) < len(normalized_name) / 2:
|
||||
base_score -= 0.2
|
||||
|
||||
return min(0.95, max(0.3, base_score))
|
||||
|
||||
def _suggest_clean_name(self, original_name: str, normalized_name: str) -> str:
|
||||
"""Suggest a cleaned version of the product name"""
|
||||
|
||||
# Capitalize properly
|
||||
words = original_name.split()
|
||||
cleaned = []
|
||||
|
||||
for word in words:
|
||||
if len(word) > 0:
|
||||
# Keep original casing for abbreviations
|
||||
if word.isupper() and len(word) <= 3:
|
||||
cleaned.append(word)
|
||||
else:
|
||||
cleaned.append(word.capitalize())
|
||||
|
||||
return ' '.join(cleaned)
|
||||
|
||||
def _suggest_supplier(self, normalized_name: str, supplier_hints: List[str]) -> Optional[str]:
|
||||
"""Suggest potential supplier based on product type"""
|
||||
|
||||
for hint in supplier_hints:
|
||||
if hint in normalized_name:
|
||||
return f"Suggested: {hint.title()}"
|
||||
|
||||
return None
|
||||
|
||||
def _is_seasonal_product(self, normalized_name: str) -> bool:
|
||||
"""Check if product appears to be seasonal"""
|
||||
|
||||
for season, patterns in self.seasonal_patterns.items():
|
||||
for pattern in patterns:
|
||||
if re.search(pattern, normalized_name, re.IGNORECASE):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _analyze_business_model(self, suggestions: List[ProductSuggestion]) -> Dict[str, Any]:
|
||||
"""Analyze business model based on product classifications"""
|
||||
|
||||
ingredient_count = sum(1 for s in suggestions if s.product_type == ProductType.INGREDIENT)
|
||||
finished_count = sum(1 for s in suggestions if s.product_type == ProductType.FINISHED_PRODUCT)
|
||||
total = len(suggestions)
|
||||
|
||||
if total == 0:
|
||||
return {"model": "unknown", "confidence": 0.0}
|
||||
|
||||
ingredient_ratio = ingredient_count / total
|
||||
|
||||
if ingredient_ratio >= 0.7:
|
||||
model = "production" # Production bakery
|
||||
elif ingredient_ratio <= 0.3:
|
||||
model = "retail" # Retail/Distribution bakery
|
||||
else:
|
||||
model = "hybrid" # Mixed model
|
||||
|
||||
confidence = max(abs(ingredient_ratio - 0.5) * 2, 0.1)
|
||||
|
||||
logger.info("Business model analysis",
|
||||
model=model, confidence=confidence,
|
||||
ingredient_count=ingredient_count,
|
||||
finished_count=finished_count)
|
||||
|
||||
return {
|
||||
"model": model,
|
||||
"confidence": confidence,
|
||||
"ingredient_ratio": ingredient_ratio,
|
||||
"recommendations": self._get_model_recommendations(model)
|
||||
}
|
||||
|
||||
def _get_model_recommendations(self, model: str) -> List[str]:
|
||||
"""Get recommendations based on detected business model"""
|
||||
|
||||
recommendations = {
|
||||
"production": [
|
||||
"Focus on ingredient inventory management",
|
||||
"Set up recipe cost calculation",
|
||||
"Configure supplier relationships",
|
||||
"Enable production planning features"
|
||||
],
|
||||
"retail": [
|
||||
"Configure central baker relationships",
|
||||
"Set up delivery schedule tracking",
|
||||
"Enable finished product freshness monitoring",
|
||||
"Focus on sales forecasting"
|
||||
],
|
||||
"hybrid": [
|
||||
"Configure both ingredient and finished product management",
|
||||
"Set up flexible inventory categories",
|
||||
"Enable both production and retail features"
|
||||
]
|
||||
}
|
||||
|
||||
return recommendations.get(model, [])
|
||||
|
||||
|
||||
# Dependency injection
|
||||
def get_product_classifier() -> ProductClassifierService:
|
||||
"""Get product classifier service instance"""
|
||||
return ProductClassifierService()
|
||||
Reference in New Issue
Block a user