Add minio support and forntend analitycs
This commit is contained in:
@@ -5,6 +5,7 @@ Combines Prophet's seasonality modeling with XGBoost's pattern learning
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import io
|
||||
from typing import Dict, List, Any, Optional, Tuple
|
||||
import structlog
|
||||
from datetime import datetime, timezone
|
||||
@@ -110,8 +111,8 @@ class HybridProphetXGBoost:
|
||||
|
||||
# Step 4: Get Prophet predictions on training data
|
||||
logger.info("Step 3: Generating Prophet predictions for residual calculation")
|
||||
train_prophet_pred = self._get_prophet_predictions(prophet_result, train_df)
|
||||
val_prophet_pred = self._get_prophet_predictions(prophet_result, val_df)
|
||||
train_prophet_pred = await self._get_prophet_predictions(prophet_result, train_df)
|
||||
val_prophet_pred = await self._get_prophet_predictions(prophet_result, val_df)
|
||||
|
||||
# Step 5: Calculate residuals (actual - prophet_prediction)
|
||||
train_residuals = train_df['y'].values - train_prophet_pred
|
||||
@@ -207,7 +208,7 @@ class HybridProphetXGBoost:
|
||||
|
||||
return df_enhanced
|
||||
|
||||
def _get_prophet_predictions(
|
||||
async def _get_prophet_predictions(
|
||||
self,
|
||||
prophet_result: Dict[str, Any],
|
||||
df: pd.DataFrame
|
||||
@@ -230,8 +231,13 @@ class HybridProphetXGBoost:
|
||||
|
||||
# Load the actual Prophet model from the stored path
|
||||
try:
|
||||
import joblib
|
||||
prophet_model = joblib.load(model_path)
|
||||
if model_path.startswith("minio://"):
|
||||
# Use prophet_manager to load from MinIO
|
||||
prophet_model = await self.prophet_manager._load_model_from_minio(model_path)
|
||||
else:
|
||||
# Fallback to direct loading for local paths
|
||||
import joblib
|
||||
prophet_model = joblib.load(model_path)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load Prophet model from path {model_path}: {str(e)}")
|
||||
|
||||
@@ -417,8 +423,13 @@ class HybridProphetXGBoost:
|
||||
|
||||
# Load the Prophet model from the stored path
|
||||
try:
|
||||
import joblib
|
||||
prophet_model = joblib.load(prophet_model_path)
|
||||
if prophet_model_path.startswith("minio://"):
|
||||
# Use prophet_manager to load from MinIO
|
||||
prophet_model = await self.prophet_manager._load_model_from_minio(prophet_model_path)
|
||||
else:
|
||||
# Fallback to direct loading for local paths
|
||||
import joblib
|
||||
prophet_model = joblib.load(prophet_model_path)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to load Prophet model from path {prophet_model_path}: {str(e)}")
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ from datetime import datetime, timedelta
|
||||
import uuid
|
||||
import os
|
||||
import joblib
|
||||
import io
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||
from sklearn.model_selection import TimeSeriesSplit
|
||||
import json
|
||||
@@ -85,9 +86,24 @@ class BakeryProphetManager:
|
||||
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "training-service")
|
||||
self.db_session = None # Will be set when session is available
|
||||
|
||||
# Ensure model storage directory exists
|
||||
os.makedirs(settings.MODEL_STORAGE_PATH, exist_ok=True)
|
||||
|
||||
# Initialize MinIO client and ensure bucket exists
|
||||
from shared.clients.minio_client import minio_client
|
||||
self.minio_client = minio_client
|
||||
self._ensure_minio_bucket()
|
||||
|
||||
def _ensure_minio_bucket(self):
|
||||
"""Ensure the training-models bucket exists in MinIO"""
|
||||
try:
|
||||
bucket_name = settings.MINIO_MODEL_BUCKET
|
||||
if not self.minio_client.bucket_exists(bucket_name):
|
||||
self.minio_client.create_bucket(bucket_name)
|
||||
logger.info(f"Created MinIO bucket: {bucket_name}")
|
||||
else:
|
||||
logger.debug(f"MinIO bucket already exists: {bucket_name}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to ensure MinIO bucket exists: {e}")
|
||||
# Don't raise - bucket might be created by init job
|
||||
|
||||
async def train_bakery_model(self,
|
||||
tenant_id: str,
|
||||
inventory_product_id: str,
|
||||
@@ -706,18 +722,40 @@ class BakeryProphetManager:
|
||||
session = None) -> str:
|
||||
"""Store model with database integration"""
|
||||
|
||||
# Create model directory
|
||||
model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
|
||||
model_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Store model in MinIO (clean implementation - MinIO only)
|
||||
# Use BytesIO buffer since joblib.dump() writes to file-like objects
|
||||
buffer = io.BytesIO()
|
||||
joblib.dump(model, buffer)
|
||||
model_data = buffer.getvalue()
|
||||
object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.pkl"
|
||||
|
||||
# Use MinIO client
|
||||
from shared.clients.minio_client import minio_client
|
||||
|
||||
# Upload model to MinIO
|
||||
success = minio_client.put_object(
|
||||
bucket_name="training-models",
|
||||
object_name=object_name,
|
||||
data=model_data,
|
||||
content_type="application/octet-stream",
|
||||
metadata={
|
||||
"model_id": model_id,
|
||||
"tenant_id": tenant_id,
|
||||
"inventory_product_id": inventory_product_id,
|
||||
"model_type": "prophet_optimized"
|
||||
}
|
||||
)
|
||||
|
||||
if not success:
|
||||
raise Exception("Failed to upload model to MinIO")
|
||||
|
||||
# Return MinIO object path
|
||||
model_path = f"minio://training-models/{object_name}"
|
||||
|
||||
# Calculate checksum for model data
|
||||
import hashlib
|
||||
model_checksum = hashlib.sha256(model_data).hexdigest()
|
||||
|
||||
# Store model file
|
||||
model_path = model_dir / f"{model_id}.pkl"
|
||||
joblib.dump(model, model_path)
|
||||
|
||||
# Calculate checksum for model file integrity
|
||||
checksummed_file = ChecksummedFile(str(model_path))
|
||||
model_checksum = checksummed_file.calculate_and_save_checksum()
|
||||
|
||||
# Enhanced metadata with checksum
|
||||
metadata = {
|
||||
"model_id": model_id,
|
||||
@@ -733,14 +771,23 @@ class BakeryProphetManager:
|
||||
"optimized_parameters": optimized_params or {},
|
||||
"created_at": datetime.now().isoformat(),
|
||||
"model_type": "prophet_optimized",
|
||||
"file_path": str(model_path),
|
||||
"minio_path": model_path,
|
||||
"checksum": model_checksum,
|
||||
"checksum_algorithm": "sha256"
|
||||
}
|
||||
|
||||
# Store metadata in MinIO as well
|
||||
metadata_json = json.dumps(metadata, indent=2, default=str)
|
||||
metadata_object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.json"
|
||||
minio_client.put_object(
|
||||
bucket_name="training-models",
|
||||
object_name=metadata_object_name,
|
||||
data=metadata_json,
|
||||
content_type="application/json"
|
||||
)
|
||||
|
||||
metadata_path = model_path.with_suffix('.json')
|
||||
with open(metadata_path, 'w') as f:
|
||||
json.dump(metadata, f, indent=2, default=str)
|
||||
# Define metadata_path for database record
|
||||
metadata_path = f"minio://training-models/{metadata_object_name}"
|
||||
|
||||
# Store in memory
|
||||
model_key = f"{tenant_id}:{inventory_product_id}"
|
||||
@@ -854,16 +901,10 @@ class BakeryProphetManager:
|
||||
model_path: str,
|
||||
future_dates: pd.DataFrame,
|
||||
regressor_columns: List[str]) -> pd.DataFrame:
|
||||
"""Generate forecast using stored model with checksum verification"""
|
||||
"""Generate forecast using stored model from MinIO"""
|
||||
try:
|
||||
# Verify model file integrity before loading
|
||||
checksummed_file = ChecksummedFile(model_path)
|
||||
if not checksummed_file.load_and_verify_checksum():
|
||||
logger.warning(f"Checksum verification failed for model: {model_path}")
|
||||
# Still load the model but log warning
|
||||
# In production, you might want to raise an exception instead
|
||||
|
||||
model = joblib.load(model_path)
|
||||
# Load model from MinIO
|
||||
model = await self._load_model_from_minio(model_path)
|
||||
|
||||
for regressor in regressor_columns:
|
||||
if regressor not in future_dates.columns:
|
||||
@@ -876,6 +917,33 @@ class BakeryProphetManager:
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to generate forecast: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _load_model_from_minio(self, model_path: str):
|
||||
"""Load model from MinIO storage"""
|
||||
try:
|
||||
# Parse MinIO path: minio://bucket_name/object_path
|
||||
if not model_path.startswith("minio://"):
|
||||
raise ValueError(f"Invalid MinIO path: {model_path}")
|
||||
|
||||
_, bucket_and_path = model_path.split("://", 1)
|
||||
bucket_name, object_name = bucket_and_path.split("/", 1)
|
||||
|
||||
logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
|
||||
|
||||
# Download model data from MinIO
|
||||
model_data = self.minio_client.get_object(bucket_name, object_name)
|
||||
if not model_data:
|
||||
raise ValueError(f"Failed to download model from MinIO: {model_path}")
|
||||
|
||||
# Deserialize model (using BytesIO since joblib.load reads from file-like objects)
|
||||
buffer = io.BytesIO(model_data)
|
||||
model = joblib.load(buffer)
|
||||
logger.info(f"Model loaded successfully from MinIO: {model_path}")
|
||||
return model
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
|
||||
raise
|
||||
|
||||
async def _validate_training_data(self, df: pd.DataFrame, inventory_product_id: str):
|
||||
"""Validate training data quality (unchanged)"""
|
||||
|
||||
Reference in New Issue
Block a user