Fix new services implementation 3
This commit is contained in:
@@ -28,22 +28,22 @@ router = APIRouter()
|
||||
|
||||
training_service = TrainingService()
|
||||
|
||||
@router.get("/tenants/{tenant_id}/models/{product_name}/active")
|
||||
@router.get("/tenants/{tenant_id}/models/{inventory_product_id}/active")
|
||||
async def get_active_model(
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
product_name: str = Path(..., description="Product name"),
|
||||
inventory_product_id: str = Path(..., description="Inventory product UUID"),
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
Get the active model for a product - used by forecasting service
|
||||
"""
|
||||
try:
|
||||
logger.debug("Getting active model", tenant_id=tenant_id, product_name=product_name)
|
||||
logger.debug("Getting active model", tenant_id=tenant_id, inventory_product_id=inventory_product_id)
|
||||
# ✅ FIX: Wrap SQL with text() for SQLAlchemy 2.0 and add case-insensitive product name matching
|
||||
query = text("""
|
||||
SELECT * FROM trained_models
|
||||
WHERE tenant_id = :tenant_id
|
||||
AND LOWER(product_name) = LOWER(:product_name)
|
||||
AND inventory_product_id = :inventory_product_id
|
||||
AND is_active = true
|
||||
AND is_production = true
|
||||
ORDER BY created_at DESC
|
||||
@@ -52,16 +52,16 @@ async def get_active_model(
|
||||
|
||||
result = await db.execute(query, {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name
|
||||
"inventory_product_id": inventory_product_id
|
||||
})
|
||||
|
||||
model_record = result.fetchone()
|
||||
|
||||
if not model_record:
|
||||
logger.info("No active model found", tenant_id=tenant_id, product_name=product_name)
|
||||
logger.info("No active model found", tenant_id=tenant_id, inventory_product_id=inventory_product_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"No active model found for product {product_name}"
|
||||
detail=f"No active model found for product {inventory_product_id}"
|
||||
)
|
||||
|
||||
# ✅ FIX: Wrap update query with text() too
|
||||
@@ -99,11 +99,11 @@ async def get_active_model(
|
||||
raise
|
||||
except Exception as e:
|
||||
error_msg = str(e) if str(e) else f"{type(e).__name__}: {repr(e)}"
|
||||
logger.error(f"Failed to get active model: {error_msg}", tenant_id=tenant_id, product_name=product_name)
|
||||
logger.error(f"Failed to get active model: {error_msg}", tenant_id=tenant_id, inventory_product_id=inventory_product_id)
|
||||
|
||||
# Handle client disconnection gracefully
|
||||
if "EndOfStream" in str(type(e)) or "WouldBlock" in str(type(e)):
|
||||
logger.info("Client disconnected during model retrieval", tenant_id=tenant_id, product_name=product_name)
|
||||
logger.info("Client disconnected during model retrieval", tenant_id=tenant_id, inventory_product_id=inventory_product_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_408_REQUEST_TIMEOUT,
|
||||
detail="Request connection closed"
|
||||
@@ -205,7 +205,7 @@ async def list_models(
|
||||
models.append({
|
||||
"model_id": str(record.id),
|
||||
"tenant_id": str(record.tenant_id),
|
||||
"product_name": record.product_name,
|
||||
"inventory_product_id": str(record.inventory_product_id),
|
||||
"model_type": record.model_type,
|
||||
"model_path": record.model_path,
|
||||
"version": 1, # Default version
|
||||
|
||||
@@ -291,12 +291,12 @@ async def execute_enhanced_training_job_background(
|
||||
job_id=job_id)
|
||||
|
||||
|
||||
@router.post("/tenants/{tenant_id}/training/products/{product_name}", response_model=TrainingJobResponse)
|
||||
@router.post("/tenants/{tenant_id}/training/products/{inventory_product_id}", response_model=TrainingJobResponse)
|
||||
@track_execution_time("enhanced_single_product_training_duration_seconds", "training-service")
|
||||
async def start_enhanced_single_product_training(
|
||||
request: SingleProductTrainingRequest,
|
||||
tenant_id: str = Path(..., description="Tenant ID"),
|
||||
product_name: str = Path(..., description="Product name"),
|
||||
inventory_product_id: str = Path(..., description="Inventory product UUID"),
|
||||
request_obj: Request = None,
|
||||
current_tenant: str = Depends(get_current_tenant_id_dep),
|
||||
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
||||
@@ -323,7 +323,7 @@ async def start_enhanced_single_product_training(
|
||||
)
|
||||
|
||||
logger.info("Starting enhanced single product training",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
tenant_id=tenant_id)
|
||||
|
||||
# Record metrics
|
||||
@@ -331,12 +331,12 @@ async def start_enhanced_single_product_training(
|
||||
metrics.increment_counter("enhanced_single_product_training_total")
|
||||
|
||||
# Generate enhanced job ID
|
||||
job_id = f"enhanced_single_{tenant_id}_{product_name}_{uuid.uuid4().hex[:8]}"
|
||||
job_id = f"enhanced_single_{tenant_id}_{inventory_product_id}_{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# Delegate to enhanced training service (single product method to be implemented)
|
||||
result = await enhanced_training_service.start_single_product_training(
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
job_id=job_id,
|
||||
bakery_location=request.bakery_location or (40.4168, -3.7038)
|
||||
)
|
||||
@@ -345,7 +345,7 @@ async def start_enhanced_single_product_training(
|
||||
metrics.increment_counter("enhanced_single_product_training_success_total")
|
||||
|
||||
logger.info("Enhanced single product training completed",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
job_id=job_id)
|
||||
|
||||
return TrainingJobResponse(**result)
|
||||
@@ -355,7 +355,7 @@ async def start_enhanced_single_product_training(
|
||||
metrics.increment_counter("enhanced_single_product_validation_errors_total")
|
||||
logger.error("Enhanced single product training validation error",
|
||||
error=str(e),
|
||||
product_name=product_name)
|
||||
inventory_product_id=inventory_product_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_400_BAD_REQUEST,
|
||||
detail=str(e)
|
||||
@@ -365,7 +365,7 @@ async def start_enhanced_single_product_training(
|
||||
metrics.increment_counter("enhanced_single_product_training_errors_total")
|
||||
logger.error("Enhanced single product training failed",
|
||||
error=str(e),
|
||||
product_name=product_name)
|
||||
inventory_product_id=inventory_product_id)
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="Enhanced single product training failed"
|
||||
|
||||
@@ -62,7 +62,7 @@ class EnhancedBakeryDataProcessor:
|
||||
sales_data: pd.DataFrame,
|
||||
weather_data: pd.DataFrame,
|
||||
traffic_data: pd.DataFrame,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
tenant_id: str = None,
|
||||
job_id: str = None,
|
||||
session=None) -> pd.DataFrame:
|
||||
@@ -73,7 +73,7 @@ class EnhancedBakeryDataProcessor:
|
||||
sales_data: Historical sales data for the product
|
||||
weather_data: Weather data
|
||||
traffic_data: Traffic data
|
||||
product_name: Product name for logging
|
||||
inventory_product_id: Inventory product UUID for logging
|
||||
tenant_id: Optional tenant ID for tracking
|
||||
job_id: Optional job ID for tracking
|
||||
|
||||
@@ -82,7 +82,7 @@ class EnhancedBakeryDataProcessor:
|
||||
"""
|
||||
try:
|
||||
logger.info("Preparing enhanced training data using repository pattern",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
tenant_id=tenant_id,
|
||||
job_id=job_id)
|
||||
|
||||
@@ -93,11 +93,11 @@ class EnhancedBakeryDataProcessor:
|
||||
# Log data preparation start if we have tracking info
|
||||
if job_id and tenant_id:
|
||||
await repos['training_log'].update_log_progress(
|
||||
job_id, 15, f"preparing_data_{product_name}", "running"
|
||||
job_id, 15, f"preparing_data_{inventory_product_id}", "running"
|
||||
)
|
||||
|
||||
# Step 1: Convert and validate sales data
|
||||
sales_clean = await self._process_sales_data(sales_data, product_name)
|
||||
sales_clean = await self._process_sales_data(sales_data, inventory_product_id)
|
||||
|
||||
# FIX: Ensure timezone awareness before any operations
|
||||
sales_clean = self._ensure_timezone_aware(sales_clean)
|
||||
@@ -129,32 +129,32 @@ class EnhancedBakeryDataProcessor:
|
||||
# Step 9: Store processing metadata if we have a tenant
|
||||
if tenant_id:
|
||||
await self._store_processing_metadata(
|
||||
repos, tenant_id, product_name, prophet_data, job_id
|
||||
repos, tenant_id, inventory_product_id, prophet_data, job_id
|
||||
)
|
||||
|
||||
logger.info("Enhanced training data prepared successfully",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
data_points=len(prophet_data))
|
||||
|
||||
return prophet_data
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error preparing enhanced training data",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
raise
|
||||
|
||||
async def _store_processing_metadata(self,
|
||||
repos: Dict,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
processed_data: pd.DataFrame,
|
||||
job_id: str = None):
|
||||
"""Store data processing metadata using repository"""
|
||||
try:
|
||||
# Create processing metadata
|
||||
metadata = {
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"data_points": len(processed_data),
|
||||
"date_range": {
|
||||
"start": processed_data['ds'].min().isoformat(),
|
||||
@@ -167,7 +167,7 @@ class EnhancedBakeryDataProcessor:
|
||||
# Log processing completion
|
||||
if job_id:
|
||||
await repos['training_log'].update_log_progress(
|
||||
job_id, 25, f"data_prepared_{product_name}", "running"
|
||||
job_id, 25, f"data_prepared_{inventory_product_id}", "running"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -270,7 +270,7 @@ class EnhancedBakeryDataProcessor:
|
||||
logger.warning("Date alignment failed, using original data", error=str(e))
|
||||
return sales_data
|
||||
|
||||
async def _process_sales_data(self, sales_data: pd.DataFrame, product_name: str) -> pd.DataFrame:
|
||||
async def _process_sales_data(self, sales_data: pd.DataFrame, inventory_product_id: str) -> pd.DataFrame:
|
||||
"""Process and clean sales data with enhanced validation"""
|
||||
sales_clean = sales_data.copy()
|
||||
|
||||
@@ -305,9 +305,9 @@ class EnhancedBakeryDataProcessor:
|
||||
sales_clean = sales_clean.dropna(subset=['quantity'])
|
||||
sales_clean = sales_clean[sales_clean['quantity'] >= 0] # No negative sales
|
||||
|
||||
# Filter for the specific product if product_name column exists
|
||||
if 'product_name' in sales_clean.columns:
|
||||
sales_clean = sales_clean[sales_clean['product_name'] == product_name]
|
||||
# Filter for the specific product if inventory_product_id column exists
|
||||
if 'inventory_product_id' in sales_clean.columns:
|
||||
sales_clean = sales_clean[sales_clean['inventory_product_id'] == inventory_product_id]
|
||||
|
||||
# Remove duplicate dates (keep the one with highest quantity)
|
||||
sales_clean = sales_clean.sort_values(['date', 'quantity'], ascending=[True, False])
|
||||
|
||||
@@ -52,7 +52,7 @@ class BakeryProphetManager:
|
||||
|
||||
async def train_bakery_model(self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
df: pd.DataFrame,
|
||||
job_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -60,10 +60,10 @@ class BakeryProphetManager:
|
||||
Same interface as before - optimization happens automatically.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Training optimized bakery model for {product_name}")
|
||||
logger.info(f"Training optimized bakery model for {inventory_product_id}")
|
||||
|
||||
# Validate input data
|
||||
await self._validate_training_data(df, product_name)
|
||||
await self._validate_training_data(df, inventory_product_id)
|
||||
|
||||
# Prepare data for Prophet
|
||||
prophet_data = await self._prepare_prophet_data(df)
|
||||
@@ -72,8 +72,8 @@ class BakeryProphetManager:
|
||||
regressor_columns = self._extract_regressor_columns(prophet_data)
|
||||
|
||||
# Automatically optimize hyperparameters (this is the new part)
|
||||
logger.info(f"Optimizing hyperparameters for {product_name}...")
|
||||
best_params = await self._optimize_hyperparameters(prophet_data, product_name, regressor_columns)
|
||||
logger.info(f"Optimizing hyperparameters for {inventory_product_id}...")
|
||||
best_params = await self._optimize_hyperparameters(prophet_data, inventory_product_id, regressor_columns)
|
||||
|
||||
# Create optimized Prophet model
|
||||
model = self._create_optimized_prophet_model(best_params, regressor_columns)
|
||||
@@ -92,7 +92,7 @@ class BakeryProphetManager:
|
||||
# Store model and metrics - Generate proper UUID for model_id
|
||||
model_id = str(uuid.uuid4())
|
||||
model_path = await self._store_model(
|
||||
tenant_id, product_name, model, model_id, prophet_data, regressor_columns, best_params, training_metrics
|
||||
tenant_id, inventory_product_id, model, model_id, prophet_data, regressor_columns, best_params, training_metrics
|
||||
)
|
||||
|
||||
# Return same format as before, but with optimization info
|
||||
@@ -112,17 +112,17 @@ class BakeryProphetManager:
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"Optimized model trained successfully for {product_name}. "
|
||||
logger.info(f"Optimized model trained successfully for {inventory_product_id}. "
|
||||
f"MAPE: {training_metrics.get('optimized_mape', 'N/A')}%")
|
||||
return model_info
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to train optimized bakery model for {product_name}: {str(e)}")
|
||||
logger.error(f"Failed to train optimized bakery model for {inventory_product_id}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _optimize_hyperparameters(self,
|
||||
df: pd.DataFrame,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
regressor_columns: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Automatically optimize Prophet hyperparameters using Bayesian optimization.
|
||||
@@ -130,7 +130,7 @@ class BakeryProphetManager:
|
||||
"""
|
||||
|
||||
# Determine product category automatically
|
||||
product_category = self._classify_product(product_name, df)
|
||||
product_category = self._classify_product(inventory_product_id, df)
|
||||
|
||||
# Set optimization parameters based on category
|
||||
n_trials = {
|
||||
@@ -140,7 +140,7 @@ class BakeryProphetManager:
|
||||
'intermittent': 15 # Reduced from 25
|
||||
}.get(product_category, 25)
|
||||
|
||||
logger.info(f"Product {product_name} classified as {product_category}, using {n_trials} trials")
|
||||
logger.info(f"Product {inventory_product_id} classified as {product_category}, using {n_trials} trials")
|
||||
|
||||
# Check data quality and adjust strategy
|
||||
total_sales = df['y'].sum()
|
||||
@@ -148,12 +148,12 @@ class BakeryProphetManager:
|
||||
mean_sales = df['y'].mean()
|
||||
non_zero_days = len(df[df['y'] > 0])
|
||||
|
||||
logger.info(f"Data analysis for {product_name}: total_sales={total_sales:.1f}, "
|
||||
logger.info(f"Data analysis for {inventory_product_id}: total_sales={total_sales:.1f}, "
|
||||
f"zero_ratio={zero_ratio:.2f}, mean_sales={mean_sales:.2f}, non_zero_days={non_zero_days}")
|
||||
|
||||
# Adjust strategy based on data characteristics
|
||||
if zero_ratio > 0.8 or non_zero_days < 30:
|
||||
logger.warning(f"Very sparse data for {product_name}, using minimal optimization")
|
||||
logger.warning(f"Very sparse data for {inventory_product_id}, using minimal optimization")
|
||||
return {
|
||||
'changepoint_prior_scale': 0.001,
|
||||
'seasonality_prior_scale': 0.01,
|
||||
@@ -166,7 +166,7 @@ class BakeryProphetManager:
|
||||
'uncertainty_samples': 100 # ✅ FIX: Minimal uncertainty sampling for very sparse data
|
||||
}
|
||||
elif zero_ratio > 0.6:
|
||||
logger.info(f"Moderate sparsity for {product_name}, using conservative optimization")
|
||||
logger.info(f"Moderate sparsity for {inventory_product_id}, using conservative optimization")
|
||||
return {
|
||||
'changepoint_prior_scale': 0.01,
|
||||
'seasonality_prior_scale': 0.1,
|
||||
@@ -180,7 +180,7 @@ class BakeryProphetManager:
|
||||
}
|
||||
|
||||
# Use unique seed for each product to avoid identical results
|
||||
product_seed = hash(product_name) % 10000
|
||||
product_seed = hash(str(inventory_product_id)) % 10000
|
||||
|
||||
def objective(trial):
|
||||
try:
|
||||
@@ -284,13 +284,13 @@ class BakeryProphetManager:
|
||||
cv_scores.append(mape_like)
|
||||
|
||||
except Exception as fold_error:
|
||||
logger.debug(f"Fold failed for {product_name} trial {trial.number}: {str(fold_error)}")
|
||||
logger.debug(f"Fold failed for {inventory_product_id} trial {trial.number}: {str(fold_error)}")
|
||||
continue
|
||||
|
||||
return np.mean(cv_scores) if len(cv_scores) > 0 else 100.0
|
||||
|
||||
except Exception as trial_error:
|
||||
logger.debug(f"Trial {trial.number} failed for {product_name}: {str(trial_error)}")
|
||||
logger.debug(f"Trial {trial.number} failed for {inventory_product_id}: {str(trial_error)}")
|
||||
return 100.0
|
||||
|
||||
# Run optimization with product-specific seed
|
||||
@@ -304,19 +304,19 @@ class BakeryProphetManager:
|
||||
best_params = study.best_params
|
||||
best_score = study.best_value
|
||||
|
||||
logger.info(f"Optimization completed for {product_name}. Best score: {best_score:.2f}%. "
|
||||
logger.info(f"Optimization completed for {inventory_product_id}. Best score: {best_score:.2f}%. "
|
||||
f"Parameters: {best_params}")
|
||||
|
||||
# ✅ FIX: Log uncertainty sampling configuration for debugging confidence intervals
|
||||
uncertainty_samples = best_params.get('uncertainty_samples', 500)
|
||||
logger.info(f"Prophet model will use {uncertainty_samples} uncertainty samples for {product_name} "
|
||||
logger.info(f"Prophet model will use {uncertainty_samples} uncertainty samples for {inventory_product_id} "
|
||||
f"(category: {product_category}, zero_ratio: {zero_ratio:.2f})")
|
||||
|
||||
return best_params
|
||||
|
||||
def _classify_product(self, product_name: str, sales_data: pd.DataFrame) -> str:
|
||||
def _classify_product(self, inventory_product_id: str, sales_data: pd.DataFrame) -> str:
|
||||
"""Automatically classify product for optimization strategy - improved for bakery data"""
|
||||
product_lower = product_name.lower()
|
||||
product_lower = str(inventory_product_id).lower()
|
||||
|
||||
# Calculate sales statistics
|
||||
total_sales = sales_data['y'].sum()
|
||||
@@ -324,7 +324,7 @@ class BakeryProphetManager:
|
||||
zero_ratio = (sales_data['y'] == 0).sum() / len(sales_data)
|
||||
non_zero_days = len(sales_data[sales_data['y'] > 0])
|
||||
|
||||
logger.info(f"Product classification for {product_name}: total_sales={total_sales:.1f}, "
|
||||
logger.info(f"Product classification for {inventory_product_id}: total_sales={total_sales:.1f}, "
|
||||
f"mean_sales={mean_sales:.2f}, zero_ratio={zero_ratio:.2f}, non_zero_days={non_zero_days}")
|
||||
|
||||
# Improved classification logic for bakery products
|
||||
@@ -499,7 +499,7 @@ class BakeryProphetManager:
|
||||
|
||||
async def _store_model(self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
model: Prophet,
|
||||
model_id: str,
|
||||
training_data: pd.DataFrame,
|
||||
@@ -520,7 +520,7 @@ class BakeryProphetManager:
|
||||
metadata = {
|
||||
"model_id": model_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"regressor_columns": regressor_columns,
|
||||
"training_samples": len(training_data),
|
||||
"data_period": {
|
||||
@@ -539,7 +539,7 @@ class BakeryProphetManager:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
|
||||
# Store in memory
|
||||
model_key = f"{tenant_id}:{product_name}"
|
||||
model_key = f"{tenant_id}:{inventory_product_id}"
|
||||
self.models[model_key] = model
|
||||
self.model_metadata[model_key] = metadata
|
||||
|
||||
@@ -547,13 +547,13 @@ class BakeryProphetManager:
|
||||
try:
|
||||
async with self.database_manager.get_session() as db_session:
|
||||
# Deactivate previous models for this product
|
||||
await self._deactivate_previous_models_with_session(db_session, tenant_id, product_name)
|
||||
await self._deactivate_previous_models_with_session(db_session, tenant_id, inventory_product_id)
|
||||
|
||||
# Create new database record
|
||||
db_model = TrainedModel(
|
||||
id=model_id,
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
model_type="prophet_optimized",
|
||||
job_id=model_id.split('_')[0], # Extract job_id from model_id
|
||||
model_path=str(model_path),
|
||||
@@ -587,23 +587,23 @@ class BakeryProphetManager:
|
||||
logger.info(f"Optimized model stored at: {model_path}")
|
||||
return str(model_path)
|
||||
|
||||
async def _deactivate_previous_models_with_session(self, db_session, tenant_id: str, product_name: str):
|
||||
async def _deactivate_previous_models_with_session(self, db_session, tenant_id: str, inventory_product_id: str):
|
||||
"""Deactivate previous models for the same product using provided session"""
|
||||
try:
|
||||
# ✅ FIX: Wrap SQL string with text() for SQLAlchemy 2.0
|
||||
query = text("""
|
||||
UPDATE trained_models
|
||||
SET is_active = false, is_production = false
|
||||
WHERE tenant_id = :tenant_id AND product_name = :product_name
|
||||
WHERE tenant_id = :tenant_id AND inventory_product_id = :inventory_product_id
|
||||
""")
|
||||
|
||||
await db_session.execute(query, {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name
|
||||
"inventory_product_id": inventory_product_id
|
||||
})
|
||||
|
||||
# Note: Don't commit here, let the calling method handle the transaction
|
||||
logger.info(f"Successfully deactivated previous models for {product_name}")
|
||||
logger.info(f"Successfully deactivated previous models for {inventory_product_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to deactivate previous models: {str(e)}")
|
||||
@@ -630,14 +630,14 @@ class BakeryProphetManager:
|
||||
logger.error(f"Failed to generate forecast: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _validate_training_data(self, df: pd.DataFrame, product_name: str):
|
||||
async def _validate_training_data(self, df: pd.DataFrame, inventory_product_id: str):
|
||||
"""Validate training data quality (unchanged)"""
|
||||
if df.empty:
|
||||
raise ValueError(f"No training data available for {product_name}")
|
||||
raise ValueError(f"No training data available for {inventory_product_id}")
|
||||
|
||||
if len(df) < settings.MIN_TRAINING_DATA_DAYS:
|
||||
raise ValueError(
|
||||
f"Insufficient training data for {product_name}: "
|
||||
f"Insufficient training data for {inventory_product_id}: "
|
||||
f"{len(df)} days, minimum required: {settings.MIN_TRAINING_DATA_DAYS}"
|
||||
)
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ class EnhancedBakeryMLTrainer:
|
||||
await self._validate_input_data(sales_df, tenant_id)
|
||||
|
||||
# Get unique products from the sales data
|
||||
products = sales_df['product_name'].unique().tolist()
|
||||
products = sales_df['inventory_product_id'].unique().tolist()
|
||||
logger.info("Training enhanced models",
|
||||
products_count=len(products),
|
||||
products=products)
|
||||
@@ -183,17 +183,17 @@ class EnhancedBakeryMLTrainer:
|
||||
"""Process data for all products using enhanced processor with repository tracking"""
|
||||
processed_data = {}
|
||||
|
||||
for product_name in products:
|
||||
for inventory_product_id in products:
|
||||
try:
|
||||
logger.info("Processing data for product using enhanced processor",
|
||||
product_name=product_name)
|
||||
inventory_product_id=inventory_product_id)
|
||||
|
||||
# Filter sales data for this product
|
||||
product_sales = sales_df[sales_df['product_name'] == product_name].copy()
|
||||
product_sales = sales_df[sales_df['inventory_product_id'] == inventory_product_id].copy()
|
||||
|
||||
if product_sales.empty:
|
||||
logger.warning("No sales data found for product",
|
||||
product_name=product_name)
|
||||
inventory_product_id=inventory_product_id)
|
||||
continue
|
||||
|
||||
# Use enhanced data processor with repository tracking
|
||||
@@ -201,19 +201,19 @@ class EnhancedBakeryMLTrainer:
|
||||
sales_data=product_sales,
|
||||
weather_data=weather_df,
|
||||
traffic_data=traffic_df,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
tenant_id=tenant_id,
|
||||
job_id=job_id
|
||||
)
|
||||
|
||||
processed_data[product_name] = processed_product_data
|
||||
processed_data[inventory_product_id] = processed_product_data
|
||||
logger.info("Enhanced processing completed",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
data_points=len(processed_product_data))
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to process data using enhanced processor",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
continue
|
||||
|
||||
@@ -231,15 +231,15 @@ class EnhancedBakeryMLTrainer:
|
||||
base_progress = 45
|
||||
max_progress = 85
|
||||
|
||||
for product_name, product_data in processed_data.items():
|
||||
for inventory_product_id, product_data in processed_data.items():
|
||||
product_start_time = time.time()
|
||||
try:
|
||||
logger.info("Training enhanced model",
|
||||
product_name=product_name)
|
||||
inventory_product_id=inventory_product_id)
|
||||
|
||||
# Check if we have enough data
|
||||
if len(product_data) < settings.MIN_TRAINING_DATA_DAYS:
|
||||
training_results[product_name] = {
|
||||
training_results[inventory_product_id] = {
|
||||
'status': 'skipped',
|
||||
'reason': 'insufficient_data',
|
||||
'data_points': len(product_data),
|
||||
@@ -247,7 +247,7 @@ class EnhancedBakeryMLTrainer:
|
||||
'message': f'Need at least {settings.MIN_TRAINING_DATA_DAYS} data points, got {len(product_data)}'
|
||||
}
|
||||
logger.warning("Skipping product due to insufficient data",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
data_points=len(product_data),
|
||||
min_required=settings.MIN_TRAINING_DATA_DAYS)
|
||||
continue
|
||||
@@ -255,24 +255,24 @@ class EnhancedBakeryMLTrainer:
|
||||
# Train the model using Prophet manager
|
||||
model_info = await self.prophet_manager.train_bakery_model(
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
df=product_data,
|
||||
job_id=job_id
|
||||
)
|
||||
|
||||
# Store model record using repository
|
||||
model_record = await self._create_model_record(
|
||||
repos, tenant_id, product_name, model_info, job_id, product_data
|
||||
repos, tenant_id, inventory_product_id, model_info, job_id, product_data
|
||||
)
|
||||
|
||||
# Create performance metrics record
|
||||
if model_info.get('training_metrics'):
|
||||
await self._create_performance_metrics(
|
||||
repos, model_record.id if model_record else None,
|
||||
tenant_id, product_name, model_info['training_metrics']
|
||||
tenant_id, inventory_product_id, model_info['training_metrics']
|
||||
)
|
||||
|
||||
training_results[product_name] = {
|
||||
training_results[inventory_product_id] = {
|
||||
'status': 'success',
|
||||
'model_info': model_info,
|
||||
'model_record_id': model_record.id if model_record else None,
|
||||
@@ -282,7 +282,7 @@ class EnhancedBakeryMLTrainer:
|
||||
}
|
||||
|
||||
logger.info("Successfully trained enhanced model",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
model_record_id=model_record.id if model_record else None)
|
||||
|
||||
completed_products = i + 1
|
||||
@@ -295,15 +295,15 @@ class EnhancedBakeryMLTrainer:
|
||||
await self.status_publisher.progress_update(
|
||||
progress=progress,
|
||||
step="model_training",
|
||||
current_product=product_name,
|
||||
step_details=f"Enhanced training completed for {product_name}"
|
||||
current_product=inventory_product_id,
|
||||
step_details=f"Enhanced training completed for {inventory_product_id}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to train enhanced model",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
training_results[product_name] = {
|
||||
training_results[inventory_product_id] = {
|
||||
'status': 'error',
|
||||
'error_message': str(e),
|
||||
'data_points': len(product_data) if product_data is not None else 0,
|
||||
@@ -320,8 +320,8 @@ class EnhancedBakeryMLTrainer:
|
||||
await self.status_publisher.progress_update(
|
||||
progress=progress,
|
||||
step="model_training",
|
||||
current_product=product_name,
|
||||
step_details=f"Enhanced training failed for {product_name}: {str(e)}"
|
||||
current_product=inventory_product_id,
|
||||
step_details=f"Enhanced training failed for {inventory_product_id}: {str(e)}"
|
||||
)
|
||||
|
||||
return training_results
|
||||
@@ -329,7 +329,7 @@ class EnhancedBakeryMLTrainer:
|
||||
async def _create_model_record(self,
|
||||
repos: Dict,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
model_info: Dict,
|
||||
job_id: str,
|
||||
processed_data: pd.DataFrame):
|
||||
@@ -337,7 +337,7 @@ class EnhancedBakeryMLTrainer:
|
||||
try:
|
||||
model_data = {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"job_id": job_id,
|
||||
"model_type": "enhanced_prophet",
|
||||
"model_path": model_info.get("model_path"),
|
||||
@@ -357,7 +357,7 @@ class EnhancedBakeryMLTrainer:
|
||||
|
||||
model_record = await repos['model'].create_model(model_data)
|
||||
logger.info("Created enhanced model record",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
model_id=model_record.id)
|
||||
|
||||
# Create artifacts for model files
|
||||
@@ -374,7 +374,7 @@ class EnhancedBakeryMLTrainer:
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to create enhanced model record",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
return None
|
||||
|
||||
@@ -382,14 +382,14 @@ class EnhancedBakeryMLTrainer:
|
||||
repos: Dict,
|
||||
model_id: str,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
metrics: Dict):
|
||||
"""Create performance metrics record using repository"""
|
||||
try:
|
||||
metric_data = {
|
||||
"model_id": str(model_id),
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"mae": metrics.get("mae"),
|
||||
"mse": metrics.get("mse"),
|
||||
"rmse": metrics.get("rmse"),
|
||||
@@ -401,12 +401,12 @@ class EnhancedBakeryMLTrainer:
|
||||
|
||||
await repos['performance'].create_performance_metric(metric_data)
|
||||
logger.info("Created enhanced performance metrics",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
model_id=model_id)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to create enhanced performance metrics",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
|
||||
async def _calculate_enhanced_training_summary(self,
|
||||
@@ -532,7 +532,7 @@ class EnhancedBakeryMLTrainer:
|
||||
|
||||
async def evaluate_model_performance_enhanced(self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
model_path: str,
|
||||
test_dataset: TrainingDataSet) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -553,17 +553,17 @@ class EnhancedBakeryMLTrainer:
|
||||
test_traffic_df = pd.DataFrame(test_dataset.traffic_data)
|
||||
|
||||
# Filter for specific product
|
||||
product_test_sales = test_sales_df[test_sales_df['product_name'] == product_name].copy()
|
||||
product_test_sales = test_sales_df[test_sales_df['inventory_product_id'] == inventory_product_id].copy()
|
||||
|
||||
if product_test_sales.empty:
|
||||
raise ValueError(f"No test data found for product: {product_name}")
|
||||
raise ValueError(f"No test data found for product: {inventory_product_id}")
|
||||
|
||||
# Process test data using enhanced processor
|
||||
processed_test_data = await self.enhanced_data_processor.prepare_training_data(
|
||||
sales_data=product_test_sales,
|
||||
weather_data=test_weather_df,
|
||||
traffic_data=test_traffic_df,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
@@ -608,16 +608,16 @@ class EnhancedBakeryMLTrainer:
|
||||
metrics["mape"] = 100.0
|
||||
|
||||
# Store evaluation metrics in repository
|
||||
model_records = await repos['model'].get_models_by_product(tenant_id, product_name)
|
||||
model_records = await repos['model'].get_models_by_product(tenant_id, inventory_product_id)
|
||||
if model_records:
|
||||
latest_model = max(model_records, key=lambda x: x.created_at)
|
||||
await self._create_performance_metrics(
|
||||
repos, latest_model.id, tenant_id, product_name, metrics
|
||||
repos, latest_model.id, tenant_id, inventory_product_id, metrics
|
||||
)
|
||||
|
||||
result = {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"enhanced_evaluation_metrics": metrics,
|
||||
"test_samples": len(processed_test_data),
|
||||
"prediction_samples": len(forecast),
|
||||
|
||||
@@ -46,7 +46,7 @@ class ModelPerformanceMetric(Base):
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
model_id = Column(String(255), index=True, nullable=False)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
product_name = Column(String(255), index=True, nullable=False)
|
||||
inventory_product_id = Column(UUID(as_uuid=True), index=True, nullable=False)
|
||||
|
||||
# Performance metrics
|
||||
mae = Column(Float, nullable=True) # Mean Absolute Error
|
||||
@@ -128,7 +128,7 @@ class TrainedModel(Base):
|
||||
# Primary identification - Updated to use UUID properly
|
||||
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
|
||||
tenant_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
product_name = Column(String, nullable=False, index=True)
|
||||
inventory_product_id = Column(UUID(as_uuid=True), nullable=False, index=True)
|
||||
|
||||
# Model information
|
||||
model_type = Column(String, default="prophet_optimized")
|
||||
@@ -174,7 +174,7 @@ class TrainedModel(Base):
|
||||
"id": str(self.id),
|
||||
"model_id": str(self.id),
|
||||
"tenant_id": str(self.tenant_id),
|
||||
"product_name": self.product_name,
|
||||
"inventory_product_id": str(self.inventory_product_id),
|
||||
"model_type": self.model_type,
|
||||
"model_version": self.model_version,
|
||||
"model_path": self.model_path,
|
||||
|
||||
@@ -29,7 +29,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
# Validate model data
|
||||
validation_result = self._validate_training_data(
|
||||
model_data,
|
||||
["tenant_id", "product_name", "model_path", "job_id"]
|
||||
["tenant_id", "inventory_product_id", "model_path", "job_id"]
|
||||
)
|
||||
|
||||
if not validation_result["is_valid"]:
|
||||
@@ -38,7 +38,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
# Check for duplicate active models for same tenant+product
|
||||
existing_model = await self.get_active_model_for_product(
|
||||
model_data["tenant_id"],
|
||||
model_data["product_name"]
|
||||
model_data["inventory_product_id"]
|
||||
)
|
||||
|
||||
# If there's an existing active model, we may want to deactivate it
|
||||
@@ -46,7 +46,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
logger.info("Deactivating previous production model",
|
||||
previous_model_id=existing_model.id,
|
||||
tenant_id=model_data["tenant_id"],
|
||||
product_name=model_data["product_name"])
|
||||
inventory_product_id=model_data["inventory_product_id"])
|
||||
await self.update(existing_model.id, {"is_production": False})
|
||||
|
||||
# Create new model
|
||||
@@ -55,7 +55,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
logger.info("Trained model created successfully",
|
||||
model_id=model.id,
|
||||
tenant_id=model.tenant_id,
|
||||
product_name=model.product_name,
|
||||
inventory_product_id=str(model.inventory_product_id),
|
||||
model_type=model.model_type)
|
||||
|
||||
return model
|
||||
@@ -65,21 +65,21 @@ class ModelRepository(TrainingBaseRepository):
|
||||
except Exception as e:
|
||||
logger.error("Failed to create trained model",
|
||||
tenant_id=model_data.get("tenant_id"),
|
||||
product_name=model_data.get("product_name"),
|
||||
inventory_product_id=model_data.get("inventory_product_id"),
|
||||
error=str(e))
|
||||
raise DatabaseError(f"Failed to create model: {str(e)}")
|
||||
|
||||
async def get_model_by_tenant_and_product(
|
||||
self,
|
||||
tenant_id: str,
|
||||
product_name: str
|
||||
inventory_product_id: str
|
||||
) -> List[TrainedModel]:
|
||||
"""Get all models for a tenant and product"""
|
||||
try:
|
||||
return await self.get_multi(
|
||||
filters={
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name
|
||||
"inventory_product_id": inventory_product_id
|
||||
},
|
||||
order_by="created_at",
|
||||
order_desc=True
|
||||
@@ -87,21 +87,21 @@ class ModelRepository(TrainingBaseRepository):
|
||||
except Exception as e:
|
||||
logger.error("Failed to get models by tenant and product",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
raise DatabaseError(f"Failed to get models: {str(e)}")
|
||||
|
||||
async def get_active_model_for_product(
|
||||
self,
|
||||
tenant_id: str,
|
||||
product_name: str
|
||||
inventory_product_id: str
|
||||
) -> Optional[TrainedModel]:
|
||||
"""Get the active production model for a product"""
|
||||
try:
|
||||
models = await self.get_multi(
|
||||
filters={
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"is_active": True,
|
||||
"is_production": True
|
||||
},
|
||||
@@ -113,7 +113,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
except Exception as e:
|
||||
logger.error("Failed to get active model for product",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
raise DatabaseError(f"Failed to get active model: {str(e)}")
|
||||
|
||||
@@ -137,7 +137,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
# Deactivate other production models for the same tenant+product
|
||||
await self._deactivate_other_production_models(
|
||||
model.tenant_id,
|
||||
model.product_name,
|
||||
str(model.inventory_product_id),
|
||||
model_id
|
||||
)
|
||||
|
||||
@@ -150,7 +150,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
logger.info("Model promoted to production",
|
||||
model_id=model_id,
|
||||
tenant_id=model.tenant_id,
|
||||
product_name=model.product_name)
|
||||
inventory_product_id=str(model.inventory_product_id))
|
||||
|
||||
return updated_model
|
||||
|
||||
@@ -223,16 +223,16 @@ class ModelRepository(TrainingBaseRepository):
|
||||
|
||||
# Get models by product using raw query
|
||||
product_query = text("""
|
||||
SELECT product_name, COUNT(*) as count
|
||||
SELECT inventory_product_id, COUNT(*) as count
|
||||
FROM trained_models
|
||||
WHERE tenant_id = :tenant_id
|
||||
AND is_active = true
|
||||
GROUP BY product_name
|
||||
GROUP BY inventory_product_id
|
||||
ORDER BY count DESC
|
||||
""")
|
||||
|
||||
result = await self.session.execute(product_query, {"tenant_id": tenant_id})
|
||||
product_stats = {row.product_name: row.count for row in result.fetchall()}
|
||||
product_stats = {row.inventory_product_id: row.count for row in result.fetchall()}
|
||||
|
||||
# Recent activity (models created in last 30 days)
|
||||
thirty_days_ago = datetime.utcnow() - timedelta(days=30)
|
||||
@@ -274,7 +274,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
async def _deactivate_other_production_models(
|
||||
self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
exclude_model_id: str
|
||||
) -> int:
|
||||
"""Deactivate other production models for the same tenant+product"""
|
||||
@@ -283,14 +283,14 @@ class ModelRepository(TrainingBaseRepository):
|
||||
UPDATE trained_models
|
||||
SET is_production = false
|
||||
WHERE tenant_id = :tenant_id
|
||||
AND product_name = :product_name
|
||||
AND inventory_product_id = :inventory_product_id
|
||||
AND id != :exclude_model_id
|
||||
AND is_production = true
|
||||
""")
|
||||
|
||||
result = await self.session.execute(query, {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"exclude_model_id": exclude_model_id
|
||||
})
|
||||
|
||||
@@ -299,7 +299,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
except Exception as e:
|
||||
logger.error("Failed to deactivate other production models",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
raise DatabaseError(f"Failed to deactivate models: {str(e)}")
|
||||
|
||||
@@ -313,7 +313,7 @@ class ModelRepository(TrainingBaseRepository):
|
||||
return {
|
||||
"model_id": model.id,
|
||||
"tenant_id": model.tenant_id,
|
||||
"product_name": model.product_name,
|
||||
"inventory_product_id": str(model.inventory_product_id),
|
||||
"model_type": model.model_type,
|
||||
"metrics": {
|
||||
"mape": model.mape,
|
||||
|
||||
@@ -29,7 +29,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
# Validate metric data
|
||||
validation_result = self._validate_training_data(
|
||||
metric_data,
|
||||
["model_id", "tenant_id", "product_name"]
|
||||
["model_id", "tenant_id", "inventory_product_id"]
|
||||
)
|
||||
|
||||
if not validation_result["is_valid"]:
|
||||
@@ -45,7 +45,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
logger.info("Performance metric created",
|
||||
model_id=metric.model_id,
|
||||
tenant_id=metric.tenant_id,
|
||||
product_name=metric.product_name)
|
||||
inventory_product_id=str(metric.inventory_product_id))
|
||||
|
||||
return metric
|
||||
|
||||
@@ -97,7 +97,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
async def get_metrics_by_tenant_and_product(
|
||||
self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
skip: int = 0,
|
||||
limit: int = 100
|
||||
) -> List[ModelPerformanceMetric]:
|
||||
@@ -106,7 +106,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
return await self.get_multi(
|
||||
filters={
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name
|
||||
"inventory_product_id": inventory_product_id
|
||||
},
|
||||
skip=skip,
|
||||
limit=limit,
|
||||
@@ -116,7 +116,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
except Exception as e:
|
||||
logger.error("Failed to get metrics by tenant and product",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
raise DatabaseError(f"Failed to get metrics: {str(e)}")
|
||||
|
||||
@@ -172,7 +172,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
async def get_performance_trends(
|
||||
self,
|
||||
tenant_id: str,
|
||||
product_name: str = None,
|
||||
inventory_product_id: str = None,
|
||||
days: int = 30
|
||||
) -> Dict[str, Any]:
|
||||
"""Get performance trends for analysis"""
|
||||
@@ -184,13 +184,13 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
conditions = ["tenant_id = :tenant_id", "measured_at >= :start_date"]
|
||||
params = {"tenant_id": tenant_id, "start_date": start_date}
|
||||
|
||||
if product_name:
|
||||
conditions.append("product_name = :product_name")
|
||||
params["product_name"] = product_name
|
||||
if inventory_product_id:
|
||||
conditions.append("inventory_product_id = :inventory_product_id")
|
||||
params["inventory_product_id"] = inventory_product_id
|
||||
|
||||
query_text = f"""
|
||||
SELECT
|
||||
product_name,
|
||||
inventory_product_id,
|
||||
AVG(mae) as avg_mae,
|
||||
AVG(mse) as avg_mse,
|
||||
AVG(rmse) as avg_rmse,
|
||||
@@ -202,7 +202,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
MAX(measured_at) as last_measurement
|
||||
FROM model_performance_metrics
|
||||
WHERE {' AND '.join(conditions)}
|
||||
GROUP BY product_name
|
||||
GROUP BY inventory_product_id
|
||||
ORDER BY avg_accuracy DESC
|
||||
"""
|
||||
|
||||
@@ -211,7 +211,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
trends = []
|
||||
for row in result.fetchall():
|
||||
trends.append({
|
||||
"product_name": row.product_name,
|
||||
"inventory_product_id": row.inventory_product_id,
|
||||
"metrics": {
|
||||
"avg_mae": float(row.avg_mae) if row.avg_mae else None,
|
||||
"avg_mse": float(row.avg_mse) if row.avg_mse else None,
|
||||
@@ -230,7 +230,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"trends": trends,
|
||||
"period_days": days,
|
||||
"total_products": len(trends)
|
||||
@@ -239,11 +239,11 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
except Exception as e:
|
||||
logger.error("Failed to get performance trends",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
return {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"trends": [],
|
||||
"period_days": days,
|
||||
"total_products": 0
|
||||
@@ -268,16 +268,16 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
order_direction = "DESC" if order_desc else "ASC"
|
||||
|
||||
query_text = f"""
|
||||
SELECT DISTINCT ON (product_name, model_id)
|
||||
SELECT DISTINCT ON (inventory_product_id, model_id)
|
||||
model_id,
|
||||
product_name,
|
||||
inventory_product_id,
|
||||
{metric_type},
|
||||
measured_at,
|
||||
evaluation_samples
|
||||
FROM model_performance_metrics
|
||||
WHERE tenant_id = :tenant_id
|
||||
AND {metric_type} IS NOT NULL
|
||||
ORDER BY product_name, model_id, measured_at DESC, {metric_type} {order_direction}
|
||||
ORDER BY inventory_product_id, model_id, measured_at DESC, {metric_type} {order_direction}
|
||||
LIMIT :limit
|
||||
"""
|
||||
|
||||
@@ -290,7 +290,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
for row in result.fetchall():
|
||||
best_models.append({
|
||||
"model_id": row.model_id,
|
||||
"product_name": row.product_name,
|
||||
"inventory_product_id": row.inventory_product_id,
|
||||
"metric_value": float(getattr(row, metric_type)),
|
||||
"metric_type": metric_type,
|
||||
"measured_at": row.measured_at.isoformat() if row.measured_at else None,
|
||||
@@ -319,12 +319,12 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
# Get metrics by product using raw query
|
||||
product_query = text("""
|
||||
SELECT
|
||||
product_name,
|
||||
inventory_product_id,
|
||||
COUNT(*) as metric_count,
|
||||
AVG(accuracy_percentage) as avg_accuracy
|
||||
FROM model_performance_metrics
|
||||
WHERE tenant_id = :tenant_id
|
||||
GROUP BY product_name
|
||||
GROUP BY inventory_product_id
|
||||
ORDER BY avg_accuracy DESC
|
||||
""")
|
||||
|
||||
@@ -332,7 +332,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
product_stats = {}
|
||||
|
||||
for row in result.fetchall():
|
||||
product_stats[row.product_name] = {
|
||||
product_stats[row.inventory_product_id] = {
|
||||
"metric_count": row.metric_count,
|
||||
"avg_accuracy": float(row.avg_accuracy) if row.avg_accuracy else None
|
||||
}
|
||||
@@ -383,7 +383,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
query_text = f"""
|
||||
SELECT
|
||||
model_id,
|
||||
product_name,
|
||||
inventory_product_id,
|
||||
AVG({metric_type}) as avg_metric,
|
||||
MIN({metric_type}) as min_metric,
|
||||
MAX({metric_type}) as max_metric,
|
||||
@@ -392,7 +392,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
FROM model_performance_metrics
|
||||
WHERE model_id IN ('{model_ids_str}')
|
||||
AND {metric_type} IS NOT NULL
|
||||
GROUP BY model_id, product_name
|
||||
GROUP BY model_id, inventory_product_id
|
||||
ORDER BY avg_metric DESC
|
||||
"""
|
||||
|
||||
@@ -402,7 +402,7 @@ class PerformanceRepository(TrainingBaseRepository):
|
||||
for row in result.fetchall():
|
||||
comparisons.append({
|
||||
"model_id": row.model_id,
|
||||
"product_name": row.product_name,
|
||||
"inventory_product_id": row.inventory_product_id,
|
||||
"avg_metric": float(row.avg_metric),
|
||||
"min_metric": float(row.min_metric),
|
||||
"max_metric": float(row.max_metric),
|
||||
|
||||
@@ -54,7 +54,7 @@ class DataSummary(BaseModel):
|
||||
|
||||
class ProductTrainingResult(BaseModel):
|
||||
"""Schema for individual product training results"""
|
||||
product_name: str = Field(..., description="Product name")
|
||||
inventory_product_id: UUID = Field(..., description="Inventory product UUID")
|
||||
status: str = Field(..., description="Training status for this product")
|
||||
model_id: Optional[str] = Field(None, description="Trained model identifier")
|
||||
data_points: int = Field(..., description="Number of data points used for training")
|
||||
@@ -188,7 +188,7 @@ class ModelInfo(BaseModel):
|
||||
|
||||
class ProductTrainingResult(BaseModel):
|
||||
"""Schema for individual product training result"""
|
||||
product_name: str = Field(..., description="Product name")
|
||||
inventory_product_id: UUID = Field(..., description="Inventory product UUID")
|
||||
status: str = Field(..., description="Training status for this product")
|
||||
model_info: Optional[ModelInfo] = Field(None, description="Model information if successful")
|
||||
data_points: int = Field(..., description="Number of data points used")
|
||||
@@ -281,7 +281,7 @@ class TrainedModelResponse(BaseModel):
|
||||
"""Response schema for trained model information"""
|
||||
model_id: str = Field(..., description="Unique model identifier")
|
||||
tenant_id: str = Field(..., description="Tenant identifier")
|
||||
product_name: str = Field(..., description="Product name")
|
||||
inventory_product_id: UUID = Field(..., description="Inventory product UUID")
|
||||
model_type: str = Field(..., description="Type of ML model")
|
||||
model_path: str = Field(..., description="Path to stored model")
|
||||
version: int = Field(..., description="Model version")
|
||||
|
||||
@@ -262,7 +262,7 @@ async def publish_job_cancelled(job_id: str, tenant_id: str, reason: str = "User
|
||||
# PRODUCT-LEVEL TRAINING EVENTS
|
||||
# =========================================
|
||||
|
||||
async def publish_product_training_started(job_id: str, tenant_id: str, product_name: str) -> bool:
|
||||
async def publish_product_training_started(job_id: str, tenant_id: str, inventory_product_id: str) -> bool:
|
||||
"""Publish single product training started event"""
|
||||
return await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
@@ -274,7 +274,7 @@ async def publish_product_training_started(job_id: str, tenant_id: str, product_
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"started_at": datetime.now().isoformat()
|
||||
}
|
||||
}
|
||||
@@ -283,7 +283,7 @@ async def publish_product_training_started(job_id: str, tenant_id: str, product_
|
||||
async def publish_product_training_completed(
|
||||
job_id: str,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
model_id: str,
|
||||
metrics: Optional[Dict[str, float]] = None
|
||||
) -> bool:
|
||||
@@ -298,7 +298,7 @@ async def publish_product_training_completed(
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"model_id": model_id,
|
||||
"metrics": metrics or {},
|
||||
"completed_at": datetime.now().isoformat()
|
||||
@@ -309,7 +309,7 @@ async def publish_product_training_completed(
|
||||
async def publish_product_training_failed(
|
||||
job_id: str,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
error: str
|
||||
) -> bool:
|
||||
"""Publish single product training failed event"""
|
||||
@@ -323,7 +323,7 @@ async def publish_product_training_failed(
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"error": error,
|
||||
"failed_at": datetime.now().isoformat()
|
||||
}
|
||||
@@ -334,7 +334,7 @@ async def publish_product_training_failed(
|
||||
# MODEL LIFECYCLE EVENTS
|
||||
# =========================================
|
||||
|
||||
async def publish_model_trained(model_id: str, tenant_id: str, product_name: str, metrics: Dict[str, float]) -> bool:
|
||||
async def publish_model_trained(model_id: str, tenant_id: str, inventory_product_id: str, metrics: Dict[str, float]) -> bool:
|
||||
"""Publish model trained event with safe metric serialization"""
|
||||
|
||||
# Clean metrics to ensure JSON serialization
|
||||
@@ -347,7 +347,7 @@ async def publish_model_trained(model_id: str, tenant_id: str, product_name: str
|
||||
"data": {
|
||||
"model_id": model_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"training_metrics": clean_metrics, # Now safe for JSON
|
||||
"trained_at": datetime.now().isoformat()
|
||||
}
|
||||
@@ -360,7 +360,7 @@ async def publish_model_trained(model_id: str, tenant_id: str, product_name: str
|
||||
)
|
||||
|
||||
|
||||
async def publish_model_validated(model_id: str, tenant_id: str, product_name: str, validation_results: Dict[str, Any]) -> bool:
|
||||
async def publish_model_validated(model_id: str, tenant_id: str, inventory_product_id: str, validation_results: Dict[str, Any]) -> bool:
|
||||
"""Publish model validation event"""
|
||||
return await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
@@ -372,14 +372,14 @@ async def publish_model_validated(model_id: str, tenant_id: str, product_name: s
|
||||
"data": {
|
||||
"model_id": model_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"validation_results": validation_results,
|
||||
"validated_at": datetime.now().isoformat()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
async def publish_model_saved(model_id: str, tenant_id: str, product_name: str, model_path: str) -> bool:
|
||||
async def publish_model_saved(model_id: str, tenant_id: str, inventory_product_id: str, model_path: str) -> bool:
|
||||
"""Publish model saved event"""
|
||||
return await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
@@ -391,7 +391,7 @@ async def publish_model_saved(model_id: str, tenant_id: str, product_name: str,
|
||||
"data": {
|
||||
"model_id": model_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"model_path": model_path,
|
||||
"saved_at": datetime.now().isoformat()
|
||||
}
|
||||
@@ -571,7 +571,7 @@ class TrainingStatusPublisher:
|
||||
|
||||
return 0
|
||||
|
||||
async def product_completed(self, product_name: str, model_id: str, metrics: Optional[Dict] = None):
|
||||
async def product_completed(self, inventory_product_id: str, model_id: str, metrics: Optional[Dict] = None):
|
||||
"""Mark a product as completed and update progress"""
|
||||
self.products_completed += 1
|
||||
|
||||
@@ -579,7 +579,7 @@ class TrainingStatusPublisher:
|
||||
clean_metrics = safe_json_serialize(metrics) if metrics else None
|
||||
|
||||
await publish_product_training_completed(
|
||||
self.job_id, self.tenant_id, product_name, model_id, clean_metrics
|
||||
self.job_id, self.tenant_id, inventory_product_id, model_id, clean_metrics
|
||||
)
|
||||
|
||||
# Update overall progress
|
||||
@@ -587,7 +587,7 @@ class TrainingStatusPublisher:
|
||||
progress = int((self.products_completed / self.products_total) * 90) # Save 10% for final steps
|
||||
await self.progress_update(
|
||||
progress=progress,
|
||||
step=f"Completed training for {product_name}",
|
||||
step=f"Completed training for {inventory_product_id}",
|
||||
current_product=None
|
||||
)
|
||||
|
||||
|
||||
@@ -234,7 +234,7 @@ class TrainingDataOrchestrator:
|
||||
|
||||
def _validate_sales_record(self, record: Dict[str, Any]) -> bool:
|
||||
"""Validate individual sales record"""
|
||||
required_fields = ['date', 'product_name']
|
||||
required_fields = ['date', 'inventory_product_id']
|
||||
quantity_fields = ['quantity', 'quantity_sold', 'sales', 'units_sold']
|
||||
|
||||
# Check required fields
|
||||
@@ -755,8 +755,8 @@ class TrainingDataOrchestrator:
|
||||
# Check data consistency
|
||||
unique_products = set()
|
||||
for record in dataset.sales_data:
|
||||
if 'product_name' in record:
|
||||
unique_products.add(record['product_name'])
|
||||
if 'inventory_product_id' in record:
|
||||
unique_products.add(record['inventory_product_id'])
|
||||
|
||||
if len(unique_products) == 0:
|
||||
validation_results["errors"].append("No product names found in sales data")
|
||||
@@ -822,7 +822,7 @@ class TrainingDataOrchestrator:
|
||||
"required": True,
|
||||
"priority": "high",
|
||||
"expected_records": "variable",
|
||||
"data_points": ["date", "product_name", "quantity"],
|
||||
"data_points": ["date", "inventory_product_id", "quantity"],
|
||||
"validation": "required_fields_check"
|
||||
}
|
||||
|
||||
|
||||
@@ -223,7 +223,7 @@ class EnhancedTrainingService:
|
||||
"training_results": training_results,
|
||||
"stored_models": [{
|
||||
"id": str(model.id),
|
||||
"product_name": model.product_name,
|
||||
"inventory_product_id": str(model.inventory_product_id),
|
||||
"model_type": model.model_type,
|
||||
"model_path": model.model_path,
|
||||
"is_active": model.is_active,
|
||||
@@ -292,11 +292,11 @@ class EnhancedTrainingService:
|
||||
models_trained_type=type(models_trained).__name__,
|
||||
models_trained_keys=list(models_trained.keys()) if isinstance(models_trained, dict) else "not_dict")
|
||||
|
||||
for product_name, model_result in models_trained.items():
|
||||
for inventory_product_id, model_result in models_trained.items():
|
||||
# Defensive check: ensure model_result is a dictionary
|
||||
if not isinstance(model_result, dict):
|
||||
logger.warning("Skipping invalid model_result for product",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
model_result_type=type(model_result).__name__,
|
||||
model_result_value=str(model_result)[:100])
|
||||
continue
|
||||
@@ -306,12 +306,12 @@ class EnhancedTrainingService:
|
||||
metrics = model_result.get("metrics", {})
|
||||
if not isinstance(metrics, dict):
|
||||
logger.warning("Invalid metrics object, using empty dict",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
metrics_type=type(metrics).__name__)
|
||||
metrics = {}
|
||||
model_data = {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"job_id": job_id,
|
||||
"model_type": "prophet_optimized",
|
||||
"model_path": model_result.get("model_path"),
|
||||
@@ -371,14 +371,14 @@ class EnhancedTrainingService:
|
||||
"""Create performance metrics for stored models"""
|
||||
try:
|
||||
for model in stored_models:
|
||||
model_result = training_results.get("models_trained", {}).get(model.product_name)
|
||||
model_result = training_results.get("models_trained", {}).get(str(model.inventory_product_id))
|
||||
if model_result and model_result.get("metrics"):
|
||||
metrics = model_result["metrics"]
|
||||
|
||||
metric_data = {
|
||||
"model_id": str(model.id),
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": model.product_name,
|
||||
"inventory_product_id": str(model.inventory_product_id),
|
||||
"mae": metrics.get("mae"),
|
||||
"mse": metrics.get("mse"),
|
||||
"rmse": metrics.get("rmse"),
|
||||
@@ -556,14 +556,14 @@ class EnhancedTrainingService:
|
||||
|
||||
async def start_single_product_training(self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
job_id: str,
|
||||
bakery_location: tuple = (40.4168, -3.7038)) -> Dict[str, Any]:
|
||||
"""Start enhanced single product training using repository pattern"""
|
||||
try:
|
||||
logger.info("Starting enhanced single product training",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
job_id=job_id)
|
||||
|
||||
# This would use the data client to fetch data for the specific product
|
||||
@@ -573,7 +573,7 @@ class EnhancedTrainingService:
|
||||
return {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"status": "completed",
|
||||
"message": "Enhanced single product training completed successfully",
|
||||
"created_at": datetime.now(),
|
||||
@@ -582,9 +582,9 @@ class EnhancedTrainingService:
|
||||
"successful_trainings": 1,
|
||||
"failed_trainings": 0,
|
||||
"products": [{
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"status": "completed",
|
||||
"model_id": f"model_{product_name}_{job_id[:8]}",
|
||||
"model_id": f"model_{inventory_product_id}_{job_id[:8]}",
|
||||
"data_points": 100,
|
||||
"metrics": {"mape": 15.5, "mae": 2.3, "rmse": 3.1, "r2_score": 0.85}
|
||||
}],
|
||||
@@ -597,7 +597,7 @@ class EnhancedTrainingService:
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Enhanced single product training failed",
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
error=str(e))
|
||||
raise
|
||||
|
||||
@@ -611,7 +611,7 @@ class EnhancedTrainingService:
|
||||
products = []
|
||||
for model in stored_models:
|
||||
products.append({
|
||||
"product_name": model.get("product_name"),
|
||||
"inventory_product_id": model.get("inventory_product_id"),
|
||||
"status": "completed",
|
||||
"model_id": model.get("id"),
|
||||
"data_points": model.get("training_samples", 0),
|
||||
|
||||
Reference in New Issue
Block a user