2025-11-05 13:34:56 +01:00
|
|
|
"""
|
|
|
|
|
Hybrid Prophet + XGBoost Trainer
|
|
|
|
|
Combines Prophet's seasonality modeling with XGBoost's pattern learning
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import numpy as np
|
|
|
|
|
from typing import Dict, List, Any, Optional, Tuple
|
|
|
|
|
import structlog
|
2025-11-14 07:23:56 +01:00
|
|
|
from datetime import datetime, timezone
|
2025-11-05 13:34:56 +01:00
|
|
|
import joblib
|
|
|
|
|
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
|
|
|
|
|
from sklearn.model_selection import TimeSeriesSplit
|
|
|
|
|
import warnings
|
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
|
|
|
|
|
|
# Import XGBoost
|
|
|
|
|
try:
|
|
|
|
|
import xgboost as xgb
|
|
|
|
|
except ImportError:
|
|
|
|
|
raise ImportError("XGBoost not installed. Run: pip install xgboost")
|
|
|
|
|
|
|
|
|
|
from app.ml.prophet_manager import BakeryProphetManager
|
|
|
|
|
from app.ml.enhanced_features import AdvancedFeatureEngineer
|
|
|
|
|
|
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HybridProphetXGBoost:
|
|
|
|
|
"""
|
|
|
|
|
Hybrid forecasting model combining Prophet and XGBoost.
|
|
|
|
|
|
|
|
|
|
Approach:
|
|
|
|
|
1. Train Prophet on historical data (captures trend, seasonality, holidays)
|
|
|
|
|
2. Calculate residuals (actual - prophet_prediction)
|
|
|
|
|
3. Train XGBoost on residuals using enhanced features
|
|
|
|
|
4. Final prediction = prophet_prediction + xgboost_residual_prediction
|
|
|
|
|
|
|
|
|
|
Benefits:
|
|
|
|
|
- Prophet handles seasonality, holidays, trends
|
|
|
|
|
- XGBoost captures complex patterns Prophet misses
|
|
|
|
|
- Maintains Prophet's interpretability
|
|
|
|
|
- Improves accuracy by 10-25% over Prophet alone
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, database_manager=None):
|
|
|
|
|
self.prophet_manager = BakeryProphetManager(database_manager)
|
|
|
|
|
self.feature_engineer = AdvancedFeatureEngineer()
|
|
|
|
|
self.xgb_model = None
|
|
|
|
|
self.feature_columns = []
|
|
|
|
|
self.prophet_model_data = None
|
|
|
|
|
|
|
|
|
|
async def train_hybrid_model(
|
|
|
|
|
self,
|
|
|
|
|
tenant_id: str,
|
|
|
|
|
inventory_product_id: str,
|
|
|
|
|
df: pd.DataFrame,
|
|
|
|
|
job_id: str,
|
2025-11-05 16:13:32 +01:00
|
|
|
validation_split: float = 0.2,
|
|
|
|
|
session = None
|
2025-11-05 13:34:56 +01:00
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Train hybrid Prophet + XGBoost model.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
tenant_id: Tenant identifier
|
|
|
|
|
inventory_product_id: Product identifier
|
|
|
|
|
df: Training data (must have 'ds', 'y' and regressor columns)
|
|
|
|
|
job_id: Training job identifier
|
|
|
|
|
validation_split: Fraction of data for validation
|
2025-11-05 16:13:32 +01:00
|
|
|
session: Optional database session (uses parent session if provided to avoid nested sessions)
|
2025-11-05 13:34:56 +01:00
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with model metadata and performance metrics
|
|
|
|
|
"""
|
|
|
|
|
logger.info(
|
|
|
|
|
"Starting hybrid Prophet + XGBoost training",
|
|
|
|
|
tenant_id=tenant_id,
|
|
|
|
|
inventory_product_id=inventory_product_id,
|
|
|
|
|
data_points=len(df)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Step 1: Train Prophet model (base forecaster)
|
|
|
|
|
logger.info("Step 1: Training Prophet base model")
|
2025-11-05 16:13:32 +01:00
|
|
|
# ✅ FIX: Pass session to prophet_manager to avoid nested session issues
|
2025-11-05 13:34:56 +01:00
|
|
|
prophet_result = await self.prophet_manager.train_bakery_model(
|
|
|
|
|
tenant_id=tenant_id,
|
|
|
|
|
inventory_product_id=inventory_product_id,
|
|
|
|
|
df=df.copy(),
|
2025-11-05 16:13:32 +01:00
|
|
|
job_id=job_id,
|
|
|
|
|
session=session
|
2025-11-05 13:34:56 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
self.prophet_model_data = prophet_result
|
|
|
|
|
|
|
|
|
|
# Step 2: Create enhanced features for XGBoost
|
|
|
|
|
logger.info("Step 2: Engineering enhanced features for XGBoost")
|
|
|
|
|
df_enhanced = self._prepare_xgboost_features(df)
|
|
|
|
|
|
|
|
|
|
# Step 3: Split into train/validation
|
|
|
|
|
split_idx = int(len(df_enhanced) * (1 - validation_split))
|
|
|
|
|
train_df = df_enhanced.iloc[:split_idx].copy()
|
|
|
|
|
val_df = df_enhanced.iloc[split_idx:].copy()
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
"Data split",
|
|
|
|
|
train_samples=len(train_df),
|
|
|
|
|
val_samples=len(val_df)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Step 4: Get Prophet predictions on training data
|
|
|
|
|
logger.info("Step 3: Generating Prophet predictions for residual calculation")
|
|
|
|
|
train_prophet_pred = self._get_prophet_predictions(prophet_result, train_df)
|
|
|
|
|
val_prophet_pred = self._get_prophet_predictions(prophet_result, val_df)
|
|
|
|
|
|
|
|
|
|
# Step 5: Calculate residuals (actual - prophet_prediction)
|
|
|
|
|
train_residuals = train_df['y'].values - train_prophet_pred
|
|
|
|
|
val_residuals = val_df['y'].values - val_prophet_pred
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
"Residuals calculated",
|
|
|
|
|
train_residual_mean=float(np.mean(train_residuals)),
|
|
|
|
|
train_residual_std=float(np.std(train_residuals))
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Step 6: Prepare feature matrix for XGBoost
|
|
|
|
|
X_train = train_df[self.feature_columns].values
|
|
|
|
|
X_val = val_df[self.feature_columns].values
|
|
|
|
|
|
|
|
|
|
# Step 7: Train XGBoost on residuals
|
|
|
|
|
logger.info("Step 4: Training XGBoost on residuals")
|
2025-11-05 14:34:53 +00:00
|
|
|
self.xgb_model = await self._train_xgboost(
|
2025-11-05 13:34:56 +01:00
|
|
|
X_train, train_residuals,
|
|
|
|
|
X_val, val_residuals
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Step 8: Evaluate hybrid model
|
|
|
|
|
logger.info("Step 5: Evaluating hybrid model performance")
|
2025-11-05 14:34:53 +00:00
|
|
|
metrics = await self._evaluate_hybrid_model(
|
2025-11-05 13:34:56 +01:00
|
|
|
train_df, val_df,
|
|
|
|
|
train_prophet_pred, val_prophet_pred,
|
|
|
|
|
prophet_result
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Step 9: Save hybrid model
|
|
|
|
|
model_data = self._package_hybrid_model(
|
|
|
|
|
prophet_result, metrics, tenant_id, inventory_product_id
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
"Hybrid model training complete",
|
|
|
|
|
prophet_mape=metrics['prophet_val_mape'],
|
|
|
|
|
hybrid_mape=metrics['hybrid_val_mape'],
|
|
|
|
|
improvement_pct=metrics['improvement_percentage']
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return model_data
|
|
|
|
|
|
|
|
|
|
def _prepare_xgboost_features(self, df: pd.DataFrame) -> pd.DataFrame:
|
|
|
|
|
"""
|
|
|
|
|
Prepare enhanced features for XGBoost.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
df: Base dataframe with 'ds', 'y' and regressor columns
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
DataFrame with all enhanced features
|
|
|
|
|
"""
|
|
|
|
|
# Rename 'ds' to 'date' for feature engineering
|
|
|
|
|
df_prep = df.copy()
|
|
|
|
|
if 'ds' in df_prep.columns:
|
|
|
|
|
df_prep['date'] = df_prep['ds']
|
|
|
|
|
|
|
|
|
|
# Ensure 'quantity' column for feature engineering
|
|
|
|
|
if 'y' in df_prep.columns:
|
|
|
|
|
df_prep['quantity'] = df_prep['y']
|
|
|
|
|
|
|
|
|
|
# Create all enhanced features
|
|
|
|
|
df_enhanced = self.feature_engineer.create_all_features(
|
|
|
|
|
df_prep,
|
|
|
|
|
date_column='date',
|
|
|
|
|
include_lags=True,
|
|
|
|
|
include_rolling=True,
|
|
|
|
|
include_interactions=True,
|
|
|
|
|
include_cyclical=True
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Fill NA values (from lagged features at beginning)
|
|
|
|
|
df_enhanced = self.feature_engineer.fill_na_values(df_enhanced)
|
|
|
|
|
|
|
|
|
|
# Get feature column list (excluding target and date columns)
|
|
|
|
|
self.feature_columns = [
|
|
|
|
|
col for col in self.feature_engineer.get_feature_columns()
|
|
|
|
|
if col in df_enhanced.columns
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# Also include original regressor columns if present
|
|
|
|
|
regressor_cols = [
|
|
|
|
|
col for col in df.columns
|
|
|
|
|
if col not in ['ds', 'y', 'date', 'quantity'] and col in df_enhanced.columns
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
self.feature_columns.extend(regressor_cols)
|
|
|
|
|
self.feature_columns = list(set(self.feature_columns)) # Remove duplicates
|
|
|
|
|
|
|
|
|
|
logger.info(f"Prepared {len(self.feature_columns)} features for XGBoost")
|
|
|
|
|
|
|
|
|
|
return df_enhanced
|
|
|
|
|
|
|
|
|
|
def _get_prophet_predictions(
|
|
|
|
|
self,
|
|
|
|
|
prophet_result: Dict[str, Any],
|
|
|
|
|
df: pd.DataFrame
|
|
|
|
|
) -> np.ndarray:
|
|
|
|
|
"""
|
|
|
|
|
Get Prophet predictions for given dataframe.
|
|
|
|
|
|
|
|
|
|
Args:
|
2026-01-12 22:15:11 +01:00
|
|
|
prophet_result: Prophet model result from training (contains model_path)
|
2025-11-05 13:34:56 +01:00
|
|
|
df: DataFrame with 'ds' column
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Array of predictions
|
|
|
|
|
"""
|
2026-01-12 22:15:11 +01:00
|
|
|
# Get the model path from result instead of expecting the model object directly
|
|
|
|
|
model_path = prophet_result.get('model_path')
|
2025-11-05 13:34:56 +01:00
|
|
|
|
2026-01-12 22:15:11 +01:00
|
|
|
if model_path is None:
|
|
|
|
|
raise ValueError("Prophet model path not found in result")
|
|
|
|
|
|
|
|
|
|
# Load the actual Prophet model from the stored path
|
|
|
|
|
try:
|
|
|
|
|
import joblib
|
|
|
|
|
prophet_model = joblib.load(model_path)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise ValueError(f"Failed to load Prophet model from path {model_path}: {str(e)}")
|
2025-11-05 13:34:56 +01:00
|
|
|
|
|
|
|
|
# Prepare dataframe for prediction
|
|
|
|
|
pred_df = df[['ds']].copy()
|
|
|
|
|
|
|
|
|
|
# Add regressors if present
|
|
|
|
|
regressor_cols = [col for col in df.columns if col not in ['ds', 'y', 'date', 'quantity']]
|
|
|
|
|
for col in regressor_cols:
|
|
|
|
|
if col in df.columns:
|
|
|
|
|
pred_df[col] = df[col]
|
|
|
|
|
|
|
|
|
|
# Get predictions
|
|
|
|
|
forecast = prophet_model.predict(pred_df)
|
|
|
|
|
|
|
|
|
|
return forecast['yhat'].values
|
|
|
|
|
|
2025-11-05 14:34:53 +00:00
|
|
|
async def _train_xgboost(
|
2025-11-05 13:34:56 +01:00
|
|
|
self,
|
|
|
|
|
X_train: np.ndarray,
|
|
|
|
|
y_train: np.ndarray,
|
|
|
|
|
X_val: np.ndarray,
|
|
|
|
|
y_val: np.ndarray
|
|
|
|
|
) -> xgb.XGBRegressor:
|
|
|
|
|
"""
|
|
|
|
|
Train XGBoost model on residuals.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
X_train: Training features
|
|
|
|
|
y_train: Training residuals
|
|
|
|
|
X_val: Validation features
|
|
|
|
|
y_val: Validation residuals
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Trained XGBoost model
|
|
|
|
|
"""
|
|
|
|
|
# XGBoost parameters optimized for residual learning
|
|
|
|
|
params = {
|
|
|
|
|
'n_estimators': 100,
|
|
|
|
|
'max_depth': 3, # Shallow trees to prevent overfitting
|
|
|
|
|
'learning_rate': 0.1,
|
|
|
|
|
'subsample': 0.8,
|
|
|
|
|
'colsample_bytree': 0.8,
|
|
|
|
|
'min_child_weight': 3,
|
|
|
|
|
'reg_alpha': 0.1, # L1 regularization
|
|
|
|
|
'reg_lambda': 1.0, # L2 regularization
|
|
|
|
|
'objective': 'reg:squarederror',
|
|
|
|
|
'random_state': 42,
|
2026-01-12 22:15:11 +01:00
|
|
|
'n_jobs': -1,
|
|
|
|
|
'early_stopping_rounds': 10
|
2025-11-05 13:34:56 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Initialize model
|
|
|
|
|
model = xgb.XGBRegressor(**params)
|
|
|
|
|
|
2025-11-05 14:34:53 +00:00
|
|
|
# ✅ FIX: Run blocking model.fit() in thread pool to avoid blocking event loop
|
|
|
|
|
import asyncio
|
|
|
|
|
await asyncio.to_thread(
|
|
|
|
|
model.fit,
|
2025-11-05 13:34:56 +01:00
|
|
|
X_train, y_train,
|
|
|
|
|
eval_set=[(X_val, y_val)],
|
|
|
|
|
verbose=False
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
"XGBoost training complete",
|
|
|
|
|
best_iteration=model.best_iteration if hasattr(model, 'best_iteration') else None
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return model
|
|
|
|
|
|
2025-11-05 14:34:53 +00:00
|
|
|
async def _evaluate_hybrid_model(
|
2025-11-05 13:34:56 +01:00
|
|
|
self,
|
|
|
|
|
train_df: pd.DataFrame,
|
|
|
|
|
val_df: pd.DataFrame,
|
|
|
|
|
train_prophet_pred: np.ndarray,
|
|
|
|
|
val_prophet_pred: np.ndarray,
|
|
|
|
|
prophet_result: Dict[str, Any]
|
2026-01-12 22:15:11 +01:00
|
|
|
) -> Dict[str, Any]:
|
2025-11-05 13:34:56 +01:00
|
|
|
"""
|
2026-01-12 22:15:11 +01:00
|
|
|
Evaluate the overall performance of the hybrid model using threading for metrics.
|
2025-11-05 13:34:56 +01:00
|
|
|
"""
|
2026-01-12 22:15:11 +01:00
|
|
|
import asyncio
|
|
|
|
|
|
|
|
|
|
# Get XGBoost predictions on training and validation
|
2025-11-05 13:34:56 +01:00
|
|
|
X_train = train_df[self.feature_columns].values
|
|
|
|
|
X_val = val_df[self.feature_columns].values
|
2026-01-12 22:15:11 +01:00
|
|
|
|
2025-11-05 14:34:53 +00:00
|
|
|
train_xgb_pred = await asyncio.to_thread(self.xgb_model.predict, X_train)
|
|
|
|
|
val_xgb_pred = await asyncio.to_thread(self.xgb_model.predict, X_val)
|
2026-01-12 22:15:11 +01:00
|
|
|
|
|
|
|
|
# Hybrid prediction = Prophet prediction + XGBoost residual prediction
|
2025-11-05 13:34:56 +01:00
|
|
|
train_hybrid_pred = train_prophet_pred + train_xgb_pred
|
|
|
|
|
val_hybrid_pred = val_prophet_pred + val_xgb_pred
|
2026-01-12 22:15:11 +01:00
|
|
|
|
|
|
|
|
actual_train = train_df['y'].values
|
|
|
|
|
actual_val = val_df['y'].values
|
|
|
|
|
|
|
|
|
|
# Basic RMSE calculation
|
|
|
|
|
train_rmse = float(np.sqrt(np.mean((actual_train - train_hybrid_pred)**2)))
|
|
|
|
|
val_rmse = float(np.sqrt(np.mean((actual_val - val_hybrid_pred)**2)))
|
|
|
|
|
|
|
|
|
|
# MAE
|
|
|
|
|
train_mae = float(np.mean(np.abs(actual_train - train_hybrid_pred)))
|
|
|
|
|
val_mae = float(np.mean(np.abs(actual_val - val_hybrid_pred)))
|
|
|
|
|
|
|
|
|
|
# MAPE (with safety for zero sales)
|
|
|
|
|
train_mape = float(np.mean(np.abs((actual_train - train_hybrid_pred) / np.maximum(actual_train, 1))))
|
|
|
|
|
val_mape = float(np.mean(np.abs((actual_val - val_hybrid_pred) / np.maximum(actual_val, 1))))
|
2025-11-05 13:34:56 +01:00
|
|
|
|
|
|
|
|
# Calculate improvement
|
2026-01-12 22:15:11 +01:00
|
|
|
prophet_metrics = prophet_result.get("metrics", {})
|
|
|
|
|
prophet_val_mae = prophet_metrics.get("val_mae", val_mae) # Fallback to hybrid if missing
|
|
|
|
|
prophet_val_mape = prophet_metrics.get("val_mape", val_mape)
|
|
|
|
|
|
|
|
|
|
improvement_pct = 0.0
|
|
|
|
|
if prophet_val_mape > 0:
|
|
|
|
|
improvement_pct = ((prophet_val_mape - val_mape) / prophet_val_mape) * 100
|
2025-11-05 13:34:56 +01:00
|
|
|
|
|
|
|
|
metrics = {
|
2026-01-12 22:15:11 +01:00
|
|
|
"train_rmse": train_rmse,
|
|
|
|
|
"val_rmse": val_rmse,
|
|
|
|
|
"train_mae": train_mae,
|
|
|
|
|
"val_mae": val_mae,
|
|
|
|
|
"train_mape": train_mape,
|
|
|
|
|
"val_mape": val_mape,
|
|
|
|
|
"prophet_val_mape": prophet_val_mape,
|
|
|
|
|
"hybrid_val_mape": val_mape,
|
|
|
|
|
"improvement_percentage": float(improvement_pct),
|
|
|
|
|
"prophet_metrics": prophet_metrics
|
2025-11-05 13:34:56 +01:00
|
|
|
}
|
|
|
|
|
|
2026-01-12 22:15:11 +01:00
|
|
|
logger.info(
|
|
|
|
|
"Hybrid model evaluation complete",
|
|
|
|
|
val_rmse=val_rmse,
|
|
|
|
|
val_mae=val_mae,
|
|
|
|
|
val_mape=val_mape,
|
|
|
|
|
improvement=improvement_pct
|
|
|
|
|
)
|
|
|
|
|
|
2025-11-05 13:34:56 +01:00
|
|
|
return metrics
|
|
|
|
|
|
|
|
|
|
def _package_hybrid_model(
|
|
|
|
|
self,
|
|
|
|
|
prophet_result: Dict[str, Any],
|
2026-01-12 22:15:11 +01:00
|
|
|
metrics: Dict[str, Any],
|
2025-11-05 13:34:56 +01:00
|
|
|
tenant_id: str,
|
|
|
|
|
inventory_product_id: str
|
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Package hybrid model for storage.
|
|
|
|
|
"""
|
|
|
|
|
return {
|
|
|
|
|
'model_type': 'hybrid_prophet_xgboost',
|
2026-01-12 22:15:11 +01:00
|
|
|
'prophet_model_path': prophet_result.get('model_path'),
|
2025-11-05 13:34:56 +01:00
|
|
|
'xgboost_model': self.xgb_model,
|
|
|
|
|
'feature_columns': self.feature_columns,
|
2026-01-12 22:15:11 +01:00
|
|
|
'metrics': metrics,
|
2025-11-05 13:34:56 +01:00
|
|
|
'tenant_id': tenant_id,
|
|
|
|
|
'inventory_product_id': inventory_product_id,
|
2025-11-14 07:23:56 +01:00
|
|
|
'trained_at': datetime.now(timezone.utc).isoformat()
|
2025-11-05 13:34:56 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async def predict(
|
|
|
|
|
self,
|
|
|
|
|
future_df: pd.DataFrame,
|
|
|
|
|
model_data: Dict[str, Any]
|
|
|
|
|
) -> pd.DataFrame:
|
|
|
|
|
"""
|
|
|
|
|
Make predictions using hybrid model.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
future_df: DataFrame with future dates and regressors
|
|
|
|
|
model_data: Loaded hybrid model data
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
DataFrame with predictions
|
|
|
|
|
"""
|
2026-01-12 22:15:11 +01:00
|
|
|
# Step 1: Get Prophet model from path and make predictions
|
|
|
|
|
prophet_model_path = model_data.get('prophet_model_path')
|
|
|
|
|
if prophet_model_path is None:
|
|
|
|
|
raise ValueError("Prophet model path not found in model data")
|
|
|
|
|
|
|
|
|
|
# Load the Prophet model from the stored path
|
|
|
|
|
try:
|
|
|
|
|
import joblib
|
|
|
|
|
prophet_model = joblib.load(prophet_model_path)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
raise ValueError(f"Failed to load Prophet model from path {prophet_model_path}: {str(e)}")
|
|
|
|
|
|
2025-11-05 14:34:53 +00:00
|
|
|
# ✅ FIX: Run blocking predict() in thread pool to avoid blocking event loop
|
|
|
|
|
import asyncio
|
|
|
|
|
prophet_forecast = await asyncio.to_thread(prophet_model.predict, future_df)
|
2025-11-05 13:34:56 +01:00
|
|
|
|
|
|
|
|
# Step 2: Prepare features for XGBoost
|
|
|
|
|
future_enhanced = self._prepare_xgboost_features(future_df)
|
|
|
|
|
|
|
|
|
|
# Step 3: Get XGBoost predictions
|
|
|
|
|
xgb_model = model_data['xgboost_model']
|
|
|
|
|
feature_columns = model_data['feature_columns']
|
|
|
|
|
X_future = future_enhanced[feature_columns].values
|
2025-11-05 14:34:53 +00:00
|
|
|
# ✅ FIX: Run blocking predict() in thread pool to avoid blocking event loop
|
|
|
|
|
xgb_pred = await asyncio.to_thread(xgb_model.predict, X_future)
|
2025-11-05 13:34:56 +01:00
|
|
|
|
|
|
|
|
# Step 4: Combine predictions
|
|
|
|
|
hybrid_pred = prophet_forecast['yhat'].values + xgb_pred
|
|
|
|
|
|
|
|
|
|
# Step 5: Create result dataframe
|
|
|
|
|
result = pd.DataFrame({
|
|
|
|
|
'ds': future_df['ds'],
|
|
|
|
|
'prophet_yhat': prophet_forecast['yhat'],
|
|
|
|
|
'xgb_adjustment': xgb_pred,
|
|
|
|
|
'yhat': hybrid_pred,
|
|
|
|
|
'yhat_lower': prophet_forecast['yhat_lower'] + xgb_pred,
|
|
|
|
|
'yhat_upper': prophet_forecast['yhat_upper'] + xgb_pred
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return result
|