Add minio support and forntend analitycs

This commit is contained in:
Urtzi Alfaro
2026-01-17 22:42:40 +01:00
parent fbc670ddb3
commit 3c4b5c2a06
53 changed files with 3485 additions and 437 deletions

View File

@@ -5,6 +5,7 @@ Combines Prophet's seasonality modeling with XGBoost's pattern learning
import pandas as pd
import numpy as np
import io
from typing import Dict, List, Any, Optional, Tuple
import structlog
from datetime import datetime, timezone
@@ -110,8 +111,8 @@ class HybridProphetXGBoost:
# Step 4: Get Prophet predictions on training data
logger.info("Step 3: Generating Prophet predictions for residual calculation")
train_prophet_pred = self._get_prophet_predictions(prophet_result, train_df)
val_prophet_pred = self._get_prophet_predictions(prophet_result, val_df)
train_prophet_pred = await self._get_prophet_predictions(prophet_result, train_df)
val_prophet_pred = await self._get_prophet_predictions(prophet_result, val_df)
# Step 5: Calculate residuals (actual - prophet_prediction)
train_residuals = train_df['y'].values - train_prophet_pred
@@ -207,7 +208,7 @@ class HybridProphetXGBoost:
return df_enhanced
def _get_prophet_predictions(
async def _get_prophet_predictions(
self,
prophet_result: Dict[str, Any],
df: pd.DataFrame
@@ -230,8 +231,13 @@ class HybridProphetXGBoost:
# Load the actual Prophet model from the stored path
try:
import joblib
prophet_model = joblib.load(model_path)
if model_path.startswith("minio://"):
# Use prophet_manager to load from MinIO
prophet_model = await self.prophet_manager._load_model_from_minio(model_path)
else:
# Fallback to direct loading for local paths
import joblib
prophet_model = joblib.load(model_path)
except Exception as e:
raise ValueError(f"Failed to load Prophet model from path {model_path}: {str(e)}")
@@ -417,8 +423,13 @@ class HybridProphetXGBoost:
# Load the Prophet model from the stored path
try:
import joblib
prophet_model = joblib.load(prophet_model_path)
if prophet_model_path.startswith("minio://"):
# Use prophet_manager to load from MinIO
prophet_model = await self.prophet_manager._load_model_from_minio(prophet_model_path)
else:
# Fallback to direct loading for local paths
import joblib
prophet_model = joblib.load(prophet_model_path)
except Exception as e:
raise ValueError(f"Failed to load Prophet model from path {prophet_model_path}: {str(e)}")

View File

@@ -13,6 +13,7 @@ from datetime import datetime, timedelta
import uuid
import os
import joblib
import io
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
import json
@@ -85,9 +86,24 @@ class BakeryProphetManager:
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "training-service")
self.db_session = None # Will be set when session is available
# Ensure model storage directory exists
os.makedirs(settings.MODEL_STORAGE_PATH, exist_ok=True)
# Initialize MinIO client and ensure bucket exists
from shared.clients.minio_client import minio_client
self.minio_client = minio_client
self._ensure_minio_bucket()
def _ensure_minio_bucket(self):
"""Ensure the training-models bucket exists in MinIO"""
try:
bucket_name = settings.MINIO_MODEL_BUCKET
if not self.minio_client.bucket_exists(bucket_name):
self.minio_client.create_bucket(bucket_name)
logger.info(f"Created MinIO bucket: {bucket_name}")
else:
logger.debug(f"MinIO bucket already exists: {bucket_name}")
except Exception as e:
logger.error(f"Failed to ensure MinIO bucket exists: {e}")
# Don't raise - bucket might be created by init job
async def train_bakery_model(self,
tenant_id: str,
inventory_product_id: str,
@@ -706,18 +722,40 @@ class BakeryProphetManager:
session = None) -> str:
"""Store model with database integration"""
# Create model directory
model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
model_dir.mkdir(parents=True, exist_ok=True)
# Store model in MinIO (clean implementation - MinIO only)
# Use BytesIO buffer since joblib.dump() writes to file-like objects
buffer = io.BytesIO()
joblib.dump(model, buffer)
model_data = buffer.getvalue()
object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.pkl"
# Use MinIO client
from shared.clients.minio_client import minio_client
# Upload model to MinIO
success = minio_client.put_object(
bucket_name="training-models",
object_name=object_name,
data=model_data,
content_type="application/octet-stream",
metadata={
"model_id": model_id,
"tenant_id": tenant_id,
"inventory_product_id": inventory_product_id,
"model_type": "prophet_optimized"
}
)
if not success:
raise Exception("Failed to upload model to MinIO")
# Return MinIO object path
model_path = f"minio://training-models/{object_name}"
# Calculate checksum for model data
import hashlib
model_checksum = hashlib.sha256(model_data).hexdigest()
# Store model file
model_path = model_dir / f"{model_id}.pkl"
joblib.dump(model, model_path)
# Calculate checksum for model file integrity
checksummed_file = ChecksummedFile(str(model_path))
model_checksum = checksummed_file.calculate_and_save_checksum()
# Enhanced metadata with checksum
metadata = {
"model_id": model_id,
@@ -733,14 +771,23 @@ class BakeryProphetManager:
"optimized_parameters": optimized_params or {},
"created_at": datetime.now().isoformat(),
"model_type": "prophet_optimized",
"file_path": str(model_path),
"minio_path": model_path,
"checksum": model_checksum,
"checksum_algorithm": "sha256"
}
# Store metadata in MinIO as well
metadata_json = json.dumps(metadata, indent=2, default=str)
metadata_object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.json"
minio_client.put_object(
bucket_name="training-models",
object_name=metadata_object_name,
data=metadata_json,
content_type="application/json"
)
metadata_path = model_path.with_suffix('.json')
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2, default=str)
# Define metadata_path for database record
metadata_path = f"minio://training-models/{metadata_object_name}"
# Store in memory
model_key = f"{tenant_id}:{inventory_product_id}"
@@ -854,16 +901,10 @@ class BakeryProphetManager:
model_path: str,
future_dates: pd.DataFrame,
regressor_columns: List[str]) -> pd.DataFrame:
"""Generate forecast using stored model with checksum verification"""
"""Generate forecast using stored model from MinIO"""
try:
# Verify model file integrity before loading
checksummed_file = ChecksummedFile(model_path)
if not checksummed_file.load_and_verify_checksum():
logger.warning(f"Checksum verification failed for model: {model_path}")
# Still load the model but log warning
# In production, you might want to raise an exception instead
model = joblib.load(model_path)
# Load model from MinIO
model = await self._load_model_from_minio(model_path)
for regressor in regressor_columns:
if regressor not in future_dates.columns:
@@ -876,6 +917,33 @@ class BakeryProphetManager:
except Exception as e:
logger.error(f"Failed to generate forecast: {str(e)}")
raise
async def _load_model_from_minio(self, model_path: str):
"""Load model from MinIO storage"""
try:
# Parse MinIO path: minio://bucket_name/object_path
if not model_path.startswith("minio://"):
raise ValueError(f"Invalid MinIO path: {model_path}")
_, bucket_and_path = model_path.split("://", 1)
bucket_name, object_name = bucket_and_path.split("/", 1)
logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
# Download model data from MinIO
model_data = self.minio_client.get_object(bucket_name, object_name)
if not model_data:
raise ValueError(f"Failed to download model from MinIO: {model_path}")
# Deserialize model (using BytesIO since joblib.load reads from file-like objects)
buffer = io.BytesIO(model_data)
model = joblib.load(buffer)
logger.info(f"Model loaded successfully from MinIO: {model_path}")
return model
except Exception as e:
logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
raise
async def _validate_training_data(self, df: pd.DataFrame, inventory_product_id: str):
"""Validate training data quality (unchanged)"""