Fix new services implementation 3

This commit is contained in:
Urtzi Alfaro
2025-08-14 16:47:34 +02:00
parent 0951547e92
commit 03737430ee
51 changed files with 657 additions and 982 deletions

View File

@@ -62,7 +62,7 @@ class EnhancedBakeryDataProcessor:
sales_data: pd.DataFrame,
weather_data: pd.DataFrame,
traffic_data: pd.DataFrame,
product_name: str,
inventory_product_id: str,
tenant_id: str = None,
job_id: str = None,
session=None) -> pd.DataFrame:
@@ -73,7 +73,7 @@ class EnhancedBakeryDataProcessor:
sales_data: Historical sales data for the product
weather_data: Weather data
traffic_data: Traffic data
product_name: Product name for logging
inventory_product_id: Inventory product UUID for logging
tenant_id: Optional tenant ID for tracking
job_id: Optional job ID for tracking
@@ -82,7 +82,7 @@ class EnhancedBakeryDataProcessor:
"""
try:
logger.info("Preparing enhanced training data using repository pattern",
product_name=product_name,
inventory_product_id=inventory_product_id,
tenant_id=tenant_id,
job_id=job_id)
@@ -93,11 +93,11 @@ class EnhancedBakeryDataProcessor:
# Log data preparation start if we have tracking info
if job_id and tenant_id:
await repos['training_log'].update_log_progress(
job_id, 15, f"preparing_data_{product_name}", "running"
job_id, 15, f"preparing_data_{inventory_product_id}", "running"
)
# Step 1: Convert and validate sales data
sales_clean = await self._process_sales_data(sales_data, product_name)
sales_clean = await self._process_sales_data(sales_data, inventory_product_id)
# FIX: Ensure timezone awareness before any operations
sales_clean = self._ensure_timezone_aware(sales_clean)
@@ -129,32 +129,32 @@ class EnhancedBakeryDataProcessor:
# Step 9: Store processing metadata if we have a tenant
if tenant_id:
await self._store_processing_metadata(
repos, tenant_id, product_name, prophet_data, job_id
repos, tenant_id, inventory_product_id, prophet_data, job_id
)
logger.info("Enhanced training data prepared successfully",
product_name=product_name,
inventory_product_id=inventory_product_id,
data_points=len(prophet_data))
return prophet_data
except Exception as e:
logger.error("Error preparing enhanced training data",
product_name=product_name,
inventory_product_id=inventory_product_id,
error=str(e))
raise
async def _store_processing_metadata(self,
repos: Dict,
tenant_id: str,
product_name: str,
inventory_product_id: str,
processed_data: pd.DataFrame,
job_id: str = None):
"""Store data processing metadata using repository"""
try:
# Create processing metadata
metadata = {
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"data_points": len(processed_data),
"date_range": {
"start": processed_data['ds'].min().isoformat(),
@@ -167,7 +167,7 @@ class EnhancedBakeryDataProcessor:
# Log processing completion
if job_id:
await repos['training_log'].update_log_progress(
job_id, 25, f"data_prepared_{product_name}", "running"
job_id, 25, f"data_prepared_{inventory_product_id}", "running"
)
except Exception as e:
@@ -270,7 +270,7 @@ class EnhancedBakeryDataProcessor:
logger.warning("Date alignment failed, using original data", error=str(e))
return sales_data
async def _process_sales_data(self, sales_data: pd.DataFrame, product_name: str) -> pd.DataFrame:
async def _process_sales_data(self, sales_data: pd.DataFrame, inventory_product_id: str) -> pd.DataFrame:
"""Process and clean sales data with enhanced validation"""
sales_clean = sales_data.copy()
@@ -305,9 +305,9 @@ class EnhancedBakeryDataProcessor:
sales_clean = sales_clean.dropna(subset=['quantity'])
sales_clean = sales_clean[sales_clean['quantity'] >= 0] # No negative sales
# Filter for the specific product if product_name column exists
if 'product_name' in sales_clean.columns:
sales_clean = sales_clean[sales_clean['product_name'] == product_name]
# Filter for the specific product if inventory_product_id column exists
if 'inventory_product_id' in sales_clean.columns:
sales_clean = sales_clean[sales_clean['inventory_product_id'] == inventory_product_id]
# Remove duplicate dates (keep the one with highest quantity)
sales_clean = sales_clean.sort_values(['date', 'quantity'], ascending=[True, False])