Fix new services implementation 3
This commit is contained in:
@@ -52,7 +52,7 @@ class BakeryProphetManager:
|
||||
|
||||
async def train_bakery_model(self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
df: pd.DataFrame,
|
||||
job_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
@@ -60,10 +60,10 @@ class BakeryProphetManager:
|
||||
Same interface as before - optimization happens automatically.
|
||||
"""
|
||||
try:
|
||||
logger.info(f"Training optimized bakery model for {product_name}")
|
||||
logger.info(f"Training optimized bakery model for {inventory_product_id}")
|
||||
|
||||
# Validate input data
|
||||
await self._validate_training_data(df, product_name)
|
||||
await self._validate_training_data(df, inventory_product_id)
|
||||
|
||||
# Prepare data for Prophet
|
||||
prophet_data = await self._prepare_prophet_data(df)
|
||||
@@ -72,8 +72,8 @@ class BakeryProphetManager:
|
||||
regressor_columns = self._extract_regressor_columns(prophet_data)
|
||||
|
||||
# Automatically optimize hyperparameters (this is the new part)
|
||||
logger.info(f"Optimizing hyperparameters for {product_name}...")
|
||||
best_params = await self._optimize_hyperparameters(prophet_data, product_name, regressor_columns)
|
||||
logger.info(f"Optimizing hyperparameters for {inventory_product_id}...")
|
||||
best_params = await self._optimize_hyperparameters(prophet_data, inventory_product_id, regressor_columns)
|
||||
|
||||
# Create optimized Prophet model
|
||||
model = self._create_optimized_prophet_model(best_params, regressor_columns)
|
||||
@@ -92,7 +92,7 @@ class BakeryProphetManager:
|
||||
# Store model and metrics - Generate proper UUID for model_id
|
||||
model_id = str(uuid.uuid4())
|
||||
model_path = await self._store_model(
|
||||
tenant_id, product_name, model, model_id, prophet_data, regressor_columns, best_params, training_metrics
|
||||
tenant_id, inventory_product_id, model, model_id, prophet_data, regressor_columns, best_params, training_metrics
|
||||
)
|
||||
|
||||
# Return same format as before, but with optimization info
|
||||
@@ -112,17 +112,17 @@ class BakeryProphetManager:
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(f"Optimized model trained successfully for {product_name}. "
|
||||
logger.info(f"Optimized model trained successfully for {inventory_product_id}. "
|
||||
f"MAPE: {training_metrics.get('optimized_mape', 'N/A')}%")
|
||||
return model_info
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to train optimized bakery model for {product_name}: {str(e)}")
|
||||
logger.error(f"Failed to train optimized bakery model for {inventory_product_id}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _optimize_hyperparameters(self,
|
||||
df: pd.DataFrame,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
regressor_columns: List[str]) -> Dict[str, Any]:
|
||||
"""
|
||||
Automatically optimize Prophet hyperparameters using Bayesian optimization.
|
||||
@@ -130,7 +130,7 @@ class BakeryProphetManager:
|
||||
"""
|
||||
|
||||
# Determine product category automatically
|
||||
product_category = self._classify_product(product_name, df)
|
||||
product_category = self._classify_product(inventory_product_id, df)
|
||||
|
||||
# Set optimization parameters based on category
|
||||
n_trials = {
|
||||
@@ -140,7 +140,7 @@ class BakeryProphetManager:
|
||||
'intermittent': 15 # Reduced from 25
|
||||
}.get(product_category, 25)
|
||||
|
||||
logger.info(f"Product {product_name} classified as {product_category}, using {n_trials} trials")
|
||||
logger.info(f"Product {inventory_product_id} classified as {product_category}, using {n_trials} trials")
|
||||
|
||||
# Check data quality and adjust strategy
|
||||
total_sales = df['y'].sum()
|
||||
@@ -148,12 +148,12 @@ class BakeryProphetManager:
|
||||
mean_sales = df['y'].mean()
|
||||
non_zero_days = len(df[df['y'] > 0])
|
||||
|
||||
logger.info(f"Data analysis for {product_name}: total_sales={total_sales:.1f}, "
|
||||
logger.info(f"Data analysis for {inventory_product_id}: total_sales={total_sales:.1f}, "
|
||||
f"zero_ratio={zero_ratio:.2f}, mean_sales={mean_sales:.2f}, non_zero_days={non_zero_days}")
|
||||
|
||||
# Adjust strategy based on data characteristics
|
||||
if zero_ratio > 0.8 or non_zero_days < 30:
|
||||
logger.warning(f"Very sparse data for {product_name}, using minimal optimization")
|
||||
logger.warning(f"Very sparse data for {inventory_product_id}, using minimal optimization")
|
||||
return {
|
||||
'changepoint_prior_scale': 0.001,
|
||||
'seasonality_prior_scale': 0.01,
|
||||
@@ -166,7 +166,7 @@ class BakeryProphetManager:
|
||||
'uncertainty_samples': 100 # ✅ FIX: Minimal uncertainty sampling for very sparse data
|
||||
}
|
||||
elif zero_ratio > 0.6:
|
||||
logger.info(f"Moderate sparsity for {product_name}, using conservative optimization")
|
||||
logger.info(f"Moderate sparsity for {inventory_product_id}, using conservative optimization")
|
||||
return {
|
||||
'changepoint_prior_scale': 0.01,
|
||||
'seasonality_prior_scale': 0.1,
|
||||
@@ -180,7 +180,7 @@ class BakeryProphetManager:
|
||||
}
|
||||
|
||||
# Use unique seed for each product to avoid identical results
|
||||
product_seed = hash(product_name) % 10000
|
||||
product_seed = hash(str(inventory_product_id)) % 10000
|
||||
|
||||
def objective(trial):
|
||||
try:
|
||||
@@ -284,13 +284,13 @@ class BakeryProphetManager:
|
||||
cv_scores.append(mape_like)
|
||||
|
||||
except Exception as fold_error:
|
||||
logger.debug(f"Fold failed for {product_name} trial {trial.number}: {str(fold_error)}")
|
||||
logger.debug(f"Fold failed for {inventory_product_id} trial {trial.number}: {str(fold_error)}")
|
||||
continue
|
||||
|
||||
return np.mean(cv_scores) if len(cv_scores) > 0 else 100.0
|
||||
|
||||
except Exception as trial_error:
|
||||
logger.debug(f"Trial {trial.number} failed for {product_name}: {str(trial_error)}")
|
||||
logger.debug(f"Trial {trial.number} failed for {inventory_product_id}: {str(trial_error)}")
|
||||
return 100.0
|
||||
|
||||
# Run optimization with product-specific seed
|
||||
@@ -304,19 +304,19 @@ class BakeryProphetManager:
|
||||
best_params = study.best_params
|
||||
best_score = study.best_value
|
||||
|
||||
logger.info(f"Optimization completed for {product_name}. Best score: {best_score:.2f}%. "
|
||||
logger.info(f"Optimization completed for {inventory_product_id}. Best score: {best_score:.2f}%. "
|
||||
f"Parameters: {best_params}")
|
||||
|
||||
# ✅ FIX: Log uncertainty sampling configuration for debugging confidence intervals
|
||||
uncertainty_samples = best_params.get('uncertainty_samples', 500)
|
||||
logger.info(f"Prophet model will use {uncertainty_samples} uncertainty samples for {product_name} "
|
||||
logger.info(f"Prophet model will use {uncertainty_samples} uncertainty samples for {inventory_product_id} "
|
||||
f"(category: {product_category}, zero_ratio: {zero_ratio:.2f})")
|
||||
|
||||
return best_params
|
||||
|
||||
def _classify_product(self, product_name: str, sales_data: pd.DataFrame) -> str:
|
||||
def _classify_product(self, inventory_product_id: str, sales_data: pd.DataFrame) -> str:
|
||||
"""Automatically classify product for optimization strategy - improved for bakery data"""
|
||||
product_lower = product_name.lower()
|
||||
product_lower = str(inventory_product_id).lower()
|
||||
|
||||
# Calculate sales statistics
|
||||
total_sales = sales_data['y'].sum()
|
||||
@@ -324,7 +324,7 @@ class BakeryProphetManager:
|
||||
zero_ratio = (sales_data['y'] == 0).sum() / len(sales_data)
|
||||
non_zero_days = len(sales_data[sales_data['y'] > 0])
|
||||
|
||||
logger.info(f"Product classification for {product_name}: total_sales={total_sales:.1f}, "
|
||||
logger.info(f"Product classification for {inventory_product_id}: total_sales={total_sales:.1f}, "
|
||||
f"mean_sales={mean_sales:.2f}, zero_ratio={zero_ratio:.2f}, non_zero_days={non_zero_days}")
|
||||
|
||||
# Improved classification logic for bakery products
|
||||
@@ -499,7 +499,7 @@ class BakeryProphetManager:
|
||||
|
||||
async def _store_model(self,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
inventory_product_id: str,
|
||||
model: Prophet,
|
||||
model_id: str,
|
||||
training_data: pd.DataFrame,
|
||||
@@ -520,7 +520,7 @@ class BakeryProphetManager:
|
||||
metadata = {
|
||||
"model_id": model_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"regressor_columns": regressor_columns,
|
||||
"training_samples": len(training_data),
|
||||
"data_period": {
|
||||
@@ -539,7 +539,7 @@ class BakeryProphetManager:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
|
||||
# Store in memory
|
||||
model_key = f"{tenant_id}:{product_name}"
|
||||
model_key = f"{tenant_id}:{inventory_product_id}"
|
||||
self.models[model_key] = model
|
||||
self.model_metadata[model_key] = metadata
|
||||
|
||||
@@ -547,13 +547,13 @@ class BakeryProphetManager:
|
||||
try:
|
||||
async with self.database_manager.get_session() as db_session:
|
||||
# Deactivate previous models for this product
|
||||
await self._deactivate_previous_models_with_session(db_session, tenant_id, product_name)
|
||||
await self._deactivate_previous_models_with_session(db_session, tenant_id, inventory_product_id)
|
||||
|
||||
# Create new database record
|
||||
db_model = TrainedModel(
|
||||
id=model_id,
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name,
|
||||
inventory_product_id=inventory_product_id,
|
||||
model_type="prophet_optimized",
|
||||
job_id=model_id.split('_')[0], # Extract job_id from model_id
|
||||
model_path=str(model_path),
|
||||
@@ -587,23 +587,23 @@ class BakeryProphetManager:
|
||||
logger.info(f"Optimized model stored at: {model_path}")
|
||||
return str(model_path)
|
||||
|
||||
async def _deactivate_previous_models_with_session(self, db_session, tenant_id: str, product_name: str):
|
||||
async def _deactivate_previous_models_with_session(self, db_session, tenant_id: str, inventory_product_id: str):
|
||||
"""Deactivate previous models for the same product using provided session"""
|
||||
try:
|
||||
# ✅ FIX: Wrap SQL string with text() for SQLAlchemy 2.0
|
||||
query = text("""
|
||||
UPDATE trained_models
|
||||
SET is_active = false, is_production = false
|
||||
WHERE tenant_id = :tenant_id AND product_name = :product_name
|
||||
WHERE tenant_id = :tenant_id AND inventory_product_id = :inventory_product_id
|
||||
""")
|
||||
|
||||
await db_session.execute(query, {
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name
|
||||
"inventory_product_id": inventory_product_id
|
||||
})
|
||||
|
||||
# Note: Don't commit here, let the calling method handle the transaction
|
||||
logger.info(f"Successfully deactivated previous models for {product_name}")
|
||||
logger.info(f"Successfully deactivated previous models for {inventory_product_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to deactivate previous models: {str(e)}")
|
||||
@@ -630,14 +630,14 @@ class BakeryProphetManager:
|
||||
logger.error(f"Failed to generate forecast: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _validate_training_data(self, df: pd.DataFrame, product_name: str):
|
||||
async def _validate_training_data(self, df: pd.DataFrame, inventory_product_id: str):
|
||||
"""Validate training data quality (unchanged)"""
|
||||
if df.empty:
|
||||
raise ValueError(f"No training data available for {product_name}")
|
||||
raise ValueError(f"No training data available for {inventory_product_id}")
|
||||
|
||||
if len(df) < settings.MIN_TRAINING_DATA_DAYS:
|
||||
raise ValueError(
|
||||
f"Insufficient training data for {product_name}: "
|
||||
f"Insufficient training data for {inventory_product_id}: "
|
||||
f"{len(df)} days, minimum required: {settings.MIN_TRAINING_DATA_DAYS}"
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user