Fix new services implementation 5

This commit is contained in:
Urtzi Alfaro
2025-08-15 17:53:59 +02:00
parent 03b4d4185d
commit f7de9115d1
43 changed files with 1714 additions and 891 deletions

View File

@@ -26,28 +26,6 @@ def get_sales_service():
"""Dependency injection for SalesService"""
return SalesService()
@router.get("/tenants/{tenant_id}/sales/products")
async def get_products_list(
tenant_id: UUID = Path(..., description="Tenant ID"),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
sales_service: SalesService = Depends(get_sales_service)
):
"""Get list of products using repository pattern"""
try:
logger.debug("Getting products list with repository pattern", tenant_id=tenant_id)
products = await sales_service.get_products_list(str(tenant_id))
logger.debug("Products list retrieved using repository",
count=len(products),
tenant_id=tenant_id)
return products
except Exception as e:
logger.error("Failed to get products list",
error=str(e),
tenant_id=tenant_id)
raise HTTPException(status_code=500, detail=f"Failed to get products list: {str(e)}")
@router.post("/tenants/{tenant_id}/sales", response_model=SalesDataResponse)
async def create_sales_record(

View File

@@ -97,8 +97,9 @@ class SalesRepository(BaseRepository[SalesData, SalesDataCreate, SalesDataUpdate
# Apply pagination
stmt = stmt.offset(query_params.offset).limit(query_params.limit)
else:
# Default ordering
stmt = stmt.order_by(desc(SalesData.date)).limit(50)
# Default ordering with safety limit for direct repository calls
# Note: API calls always provide query_params, so this only applies to direct usage
stmt = stmt.order_by(desc(SalesData.date)).limit(10000)
result = await self.session.execute(stmt)
records = result.scalars().all()
@@ -279,24 +280,3 @@ class SalesRepository(BaseRepository[SalesData, SalesDataCreate, SalesDataUpdate
logger.error("Failed to validate sales record", error=str(e), record_id=record_id)
raise
async def get_product_statistics(self, tenant_id: str) -> List[Dict[str, Any]]:
"""Get product statistics for tenant"""
try:
# Note: product_name field was removed - product info now managed via inventory service
# This method should be updated to query products from inventory service
# For now, return inventory_product_ids to avoid breaking existing code
stmt = select(SalesData.inventory_product_id).where(
and_(
SalesData.tenant_id == tenant_id,
SalesData.inventory_product_id.is_not(None)
)
).distinct()
result = await self.session.execute(stmt)
products = [str(row[0]) for row in result if row[0]]
return sorted(products)
except Exception as e:
logger.error("Failed to get product categories", error=str(e), tenant_id=tenant_id)
raise

View File

@@ -286,14 +286,67 @@ class AIOnboardingService:
)
suggestions.append(suggestion)
business_model = BusinessModelAnalysis(
model=business_model_raw.get("model", "unknown"),
confidence=business_model_raw.get("confidence", 0.0),
ingredient_count=business_model_raw.get("ingredient_count", 0),
finished_product_count=business_model_raw.get("finished_product_count", 0),
ingredient_ratio=business_model_raw.get("ingredient_ratio", 0.0),
recommendations=business_model_raw.get("recommendations", [])
)
# Check if enhanced business intelligence data is available
bi_data = product_analysis.get('__business_intelligence__')
if bi_data and bi_data.get('confidence_score', 0) > 0.6:
# Use enhanced business intelligence analysis
business_type = bi_data.get('business_type', 'bakery')
business_model_detected = bi_data.get('business_model', 'individual')
# Map business intelligence results to existing model format
model_mapping = {
'individual': 'individual_bakery',
'central_distribution': 'central_baker_satellite',
'central_bakery': 'central_baker_satellite',
'hybrid': 'hybrid_bakery'
}
mapped_model = model_mapping.get(business_model_detected, 'individual_bakery')
# Count ingredients vs finished products from suggestions
ingredient_count = sum(1 for s in suggestions if s.product_type == 'ingredient')
finished_product_count = sum(1 for s in suggestions if s.product_type == 'finished_product')
total_products = len(suggestions)
ingredient_ratio = ingredient_count / total_products if total_products > 0 else 0.0
# Enhanced recommendations based on BI analysis
enhanced_recommendations = bi_data.get('recommendations', [])
# Add business type specific recommendations
if business_type == 'coffee_shop':
enhanced_recommendations.extend([
"Configure beverage inventory management",
"Set up quick-service item tracking",
"Enable all-day service optimization"
])
business_model = BusinessModelAnalysis(
model=mapped_model,
confidence=bi_data.get('confidence_score', 0.0),
ingredient_count=ingredient_count,
finished_product_count=finished_product_count,
ingredient_ratio=ingredient_ratio,
recommendations=enhanced_recommendations[:6] # Limit to top 6 recommendations
)
logger.info("Using enhanced business intelligence for model analysis",
detected_type=business_type,
detected_model=business_model_detected,
mapped_model=mapped_model,
confidence=bi_data.get('confidence_score'))
else:
# Fallback to basic inventory service analysis
business_model = BusinessModelAnalysis(
model=business_model_raw.get("model", "unknown"),
confidence=business_model_raw.get("confidence", 0.0),
ingredient_count=business_model_raw.get("ingredient_count", 0),
finished_product_count=business_model_raw.get("finished_product_count", 0),
ingredient_ratio=business_model_raw.get("ingredient_ratio", 0.0),
recommendations=business_model_raw.get("recommendations", [])
)
logger.info("Using basic inventory service business model analysis")
# Calculate confidence metrics
high_confidence_count = sum(1 for s in suggestions if s.confidence_score >= 0.7)
@@ -674,6 +727,85 @@ class AIOnboardingService:
"avg_unit_price": avg_unit_price
}
# Add enhanced business intelligence analysis
try:
from app.services.business_intelligence_service import BusinessIntelligenceService
bi_service = BusinessIntelligenceService()
# Convert parsed data to format expected by BI service
sales_data = []
product_data = []
for row in rows:
# Create sales record from CSV row
sales_record = {
'date': row.get(date_column, ''),
'product_name': row.get(product_column, ''),
'name': row.get(product_column, ''),
'quantity_sold': 0,
'revenue': 0,
'location_id': row.get('location', 'main'),
'sales_channel': row.get('channel', 'in_store'),
'supplier_name': row.get('supplier', ''),
'brand': row.get('brand', '')
}
# Parse quantity
if quantity_column:
try:
qty_raw = row.get(quantity_column, 1)
if qty_raw and str(qty_raw).strip():
sales_record['quantity_sold'] = int(float(str(qty_raw).replace(',', '.')))
except:
sales_record['quantity_sold'] = 1
# Parse revenue
if revenue_column:
try:
rev_raw = row.get(revenue_column)
if rev_raw and str(rev_raw).strip():
sales_record['revenue'] = float(str(rev_raw).replace(',', '.').replace('', '').replace('$', '').strip())
except:
pass
sales_data.append(sales_record)
# Create product data entry
product_data.append({
'name': sales_record['product_name'],
'supplier_name': sales_record.get('supplier_name', ''),
'brand': sales_record.get('brand', '')
})
# Run business intelligence analysis
if sales_data:
detection_result = await bi_service.analyze_business_from_sales_data(
sales_data=sales_data,
product_data=product_data
)
# Store business intelligence results in product_analysis
product_analysis['__business_intelligence__'] = {
"business_type": detection_result.business_type,
"business_model": detection_result.business_model,
"confidence_score": detection_result.confidence_score,
"indicators": detection_result.indicators,
"recommendations": detection_result.recommendations,
"analysis_summary": f"{detection_result.business_type.title()} - {detection_result.business_model.replace('_', ' ').title()}"
}
logger.info("Enhanced business intelligence analysis completed",
business_type=detection_result.business_type,
business_model=detection_result.business_model,
confidence=detection_result.confidence_score)
else:
logger.warning("No sales data available for business intelligence analysis")
except Exception as bi_error:
logger.warning("Business intelligence analysis failed", error=str(bi_error))
# Continue with basic analysis even if BI fails
return product_analysis
except Exception as e:

View File

@@ -428,7 +428,7 @@ class DataImportService:
repository: SalesRepository,
filename: Optional[str] = None
) -> Dict[str, Any]:
"""Enhanced CSV processing with better data handling"""
"""Enhanced CSV processing with batch product resolution for better reliability"""
try:
reader = csv.DictReader(io.StringIO(csv_content))
rows = list(reader)
@@ -445,22 +445,41 @@ class DataImportService:
# Enhanced column mapping
column_mapping = self._detect_columns(list(rows[0].keys()))
records_created = 0
errors = []
warnings = []
# Pre-process to extract unique products for batch creation
unique_products = set()
parsed_rows = []
logger.info(f"Processing {len(rows)} records from CSV with enhanced mapping")
logger.info(f"Pre-processing {len(rows)} records to identify unique products")
for index, row in enumerate(rows):
try:
# Enhanced data parsing and validation
parsed_data = await self._parse_row_data(row, column_mapping, index + 1)
if parsed_data.get("skip"):
errors.extend(parsed_data.get("errors", []))
warnings.extend(parsed_data.get("warnings", []))
continue
# Resolve product name to inventory_product_id
if not parsed_data.get("skip"):
unique_products.add((
parsed_data["product_name"],
parsed_data.get("product_category", "general")
))
parsed_rows.append((index, parsed_data))
except Exception as e:
logger.warning(f"Failed to parse row {index + 1}: {e}")
continue
logger.info(f"Found {len(unique_products)} unique products, attempting batch resolution")
# Try to resolve/create all unique products in batch
await self._batch_resolve_products(unique_products, tenant_id)
# Now process the actual sales records
records_created = 0
errors = []
warnings = []
logger.info(f"Processing {len(parsed_rows)} validated records for sales creation")
for index, parsed_data in parsed_rows:
try:
# Resolve product name to inventory_product_id (should be cached now)
inventory_product_id = await self._resolve_product_to_inventory_id(
parsed_data["product_name"],
parsed_data.get("product_category"),
@@ -914,47 +933,57 @@ class DataImportService:
logger.info("Import cache cleared for new session")
async def _resolve_product_to_inventory_id(self, product_name: str, product_category: Optional[str], tenant_id: UUID) -> Optional[UUID]:
"""Resolve a product name to an inventory_product_id via the inventory service with caching and rate limiting"""
"""Resolve a product name to an inventory_product_id via the inventory service with improved error handling and fallback"""
# Check cache first
if product_name in self.product_cache:
logger.debug("Product resolved from cache", product_name=product_name, tenant_id=tenant_id)
return self.product_cache[product_name]
# Skip if this product already failed to resolve
# Skip if this product already failed to resolve after all attempts
if product_name in self.failed_products:
logger.debug("Skipping previously failed product", product_name=product_name, tenant_id=tenant_id)
return None
max_retries = 3
base_delay = 1.0 # Start with 1 second delay
max_retries = 5 # Increased retries
base_delay = 2.0 # Increased base delay
fallback_retry_delay = 10.0 # Longer delay for fallback attempts
for attempt in range(max_retries):
try:
# Add delay before API calls to avoid rate limiting
# Add progressive delay to avoid rate limiting
if attempt > 0:
delay = base_delay * (2 ** (attempt - 1)) # Exponential backoff
# Use longer delays for later attempts
if attempt >= 3:
delay = fallback_retry_delay # Use fallback delay for later attempts
else:
delay = base_delay * (2 ** (attempt - 1)) # Exponential backoff
logger.info(f"Retrying product resolution after {delay}s delay",
product_name=product_name, attempt=attempt, tenant_id=tenant_id)
await asyncio.sleep(delay)
# First try to search for existing product by name
products = await self.inventory_client.search_products(product_name, tenant_id)
try:
products = await self.inventory_client.search_products(product_name, tenant_id)
if products:
# Return the first matching product's ID
product_id = products[0].get('id')
if product_id:
uuid_id = UUID(str(product_id))
self.product_cache[product_name] = uuid_id # Cache for future use
logger.info("Resolved product to existing inventory ID",
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
return uuid_id
except Exception as search_error:
logger.warning("Product search failed, trying direct creation",
product_name=product_name, error=str(search_error), tenant_id=tenant_id)
if products:
# Return the first matching product's ID
product_id = products[0].get('id')
if product_id:
uuid_id = UUID(str(product_id))
self.product_cache[product_name] = uuid_id # Cache for future use
logger.info("Resolved product to existing inventory ID",
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
return uuid_id
# Add delay before creation attempt to avoid hitting rate limits
await asyncio.sleep(1.0)
# Add small delay before creation attempt to avoid hitting rate limits
await asyncio.sleep(0.5)
# If not found, create a new ingredient/product in inventory
# If not found or search failed, create a new ingredient/product in inventory
ingredient_data = {
'name': product_name,
'type': 'finished_product', # Assuming sales are of finished products
@@ -965,36 +994,133 @@ class DataImportService:
'category': product_category or 'general'
}
created_product = await self.inventory_client.create_ingredient(ingredient_data, str(tenant_id))
if created_product and created_product.get('id'):
product_id = created_product['id']
uuid_id = UUID(str(product_id))
self.product_cache[product_name] = uuid_id # Cache for future use
logger.info("Created new inventory product for sales data",
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
return uuid_id
try:
created_product = await self.inventory_client.create_ingredient(ingredient_data, str(tenant_id))
if created_product and created_product.get('id'):
product_id = created_product['id']
uuid_id = UUID(str(product_id))
self.product_cache[product_name] = uuid_id # Cache for future use
logger.info("Created new inventory product for sales data",
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
return uuid_id
except Exception as creation_error:
logger.warning("Product creation failed",
product_name=product_name, error=str(creation_error), tenant_id=tenant_id)
logger.warning("Failed to resolve or create product in inventory",
product_name=product_name, tenant_id=tenant_id, attempt=attempt)
except Exception as e:
error_str = str(e)
if "429" in error_str or "rate limit" in error_str.lower():
logger.warning("Rate limit hit, retrying",
if "429" in error_str or "rate limit" in error_str.lower() or "too many requests" in error_str.lower():
logger.warning("Rate limit or service overload detected, retrying with longer delay",
product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
if attempt < max_retries - 1:
continue # Retry with exponential backoff
elif "503" in error_str or "502" in error_str or "service unavailable" in error_str.lower():
logger.warning("Service unavailable, retrying with backoff",
product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
if attempt < max_retries - 1:
continue # Retry for service unavailable errors
elif "timeout" in error_str.lower() or "connection" in error_str.lower():
logger.warning("Network issue detected, retrying",
product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
if attempt < max_retries - 1:
continue # Retry for network issues
else:
logger.error("Error resolving product to inventory ID",
logger.error("Non-retryable error resolving product to inventory ID",
error=error_str, product_name=product_name, tenant_id=tenant_id)
break # Don't retry for non-rate-limit errors
if attempt < max_retries - 1:
# Still retry even for other errors, in case it's transient
continue
else:
break # Don't retry on final attempt
# If all retries failed, mark as failed and return None
# If all retries failed, log detailed error but don't mark as permanently failed yet
# Instead, we'll implement a fallback mechanism
logger.error("Failed to resolve product after all retries, attempting fallback",
product_name=product_name, tenant_id=tenant_id)
# FALLBACK: Try to create a temporary product with minimal data
try:
# Use a simplified approach with minimal data
fallback_data = {
'name': product_name,
'type': 'finished_product',
'unit': 'unit',
'current_stock': 0,
'cost_per_unit': 0
}
logger.info("Attempting fallback product creation with minimal data",
product_name=product_name, tenant_id=tenant_id)
created_product = await self.inventory_client.create_ingredient(fallback_data, str(tenant_id))
if created_product and created_product.get('id'):
product_id = created_product['id']
uuid_id = UUID(str(product_id))
self.product_cache[product_name] = uuid_id
logger.info("SUCCESS: Fallback product creation succeeded",
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
return uuid_id
except Exception as fallback_error:
logger.error("Fallback product creation also failed",
product_name=product_name, error=str(fallback_error), tenant_id=tenant_id)
# Only mark as permanently failed after all attempts including fallback
self.failed_products.add(product_name)
logger.error("Failed to resolve product after all retries",
logger.error("CRITICAL: Permanently failed to resolve product - this will result in missing training data",
product_name=product_name, tenant_id=tenant_id)
return None
async def _batch_resolve_products(self, unique_products: set, tenant_id: str) -> None:
"""Batch resolve/create products to reduce API calls and improve success rate"""
if not unique_products:
return
logger.info(f"Starting batch product resolution for {len(unique_products)} unique products")
# Convert set to list for easier handling
products_list = list(unique_products)
batch_size = 5 # Process in smaller batches to avoid overwhelming the inventory service
for i in range(0, len(products_list), batch_size):
batch = products_list[i:i + batch_size]
logger.info(f"Processing batch {i//batch_size + 1}/{(len(products_list) + batch_size - 1)//batch_size}")
# Process each product in the batch with retry logic
for product_name, product_category in batch:
try:
# Skip if already in cache or failed list
if product_name in self.product_cache or product_name in self.failed_products:
continue
# Try to resolve the product
await self._resolve_product_to_inventory_id(product_name, product_category, tenant_id)
# Add small delay between products to be gentle on the API
await asyncio.sleep(0.5)
except Exception as e:
logger.warning(f"Failed to batch process product {product_name}: {e}")
continue
# Add delay between batches
if i + batch_size < len(products_list):
logger.info("Waiting between batches to avoid rate limiting...")
await asyncio.sleep(2.0)
successful_resolutions = len([p for p, _ in products_list if p in self.product_cache])
failed_resolutions = len([p for p, _ in products_list if p in self.failed_products])
logger.info(f"Batch product resolution completed: {successful_resolutions} successful, {failed_resolutions} failed")
if failed_resolutions > 0:
logger.warning(f"ATTENTION: {failed_resolutions} products failed to resolve - these will be missing from training data")
return
def _structure_messages(self, messages: List[Union[str, Dict]]) -> List[Dict[str, Any]]:
"""Convert string messages to structured format"""
structured = []

View File

@@ -285,26 +285,6 @@ class SalesService:
# Don't fail the main operation for auxiliary actions
logger.warning("Failed to execute post-create actions", error=str(e), record_id=record.id)
async def get_products_list(self, tenant_id: str) -> List[Dict[str, Any]]:
"""Get list of all products with sales data for tenant using repository pattern"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
# Use repository method for product statistics
products = await repository.get_product_statistics(tenant_id)
logger.debug("Products list retrieved successfully",
tenant_id=tenant_id,
product_count=len(products))
return products
except Exception as e:
logger.error("Failed to get products list",
error=str(e),
tenant_id=tenant_id)
raise DatabaseError(f"Failed to get products list: {str(e)}")
# New inventory integration methods
async def search_inventory_products(self, search_term: str, tenant_id: UUID,