Add minio support and forntend analitycs

This commit is contained in:
Urtzi Alfaro
2026-01-17 22:42:40 +01:00
parent fbc670ddb3
commit 3c4b5c2a06
53 changed files with 3485 additions and 437 deletions

View File

@@ -13,6 +13,7 @@ from datetime import datetime, timedelta
import uuid
import os
import joblib
import io
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import TimeSeriesSplit
import json
@@ -85,9 +86,24 @@ class BakeryProphetManager:
self.database_manager = database_manager or create_database_manager(settings.DATABASE_URL, "training-service")
self.db_session = None # Will be set when session is available
# Ensure model storage directory exists
os.makedirs(settings.MODEL_STORAGE_PATH, exist_ok=True)
# Initialize MinIO client and ensure bucket exists
from shared.clients.minio_client import minio_client
self.minio_client = minio_client
self._ensure_minio_bucket()
def _ensure_minio_bucket(self):
"""Ensure the training-models bucket exists in MinIO"""
try:
bucket_name = settings.MINIO_MODEL_BUCKET
if not self.minio_client.bucket_exists(bucket_name):
self.minio_client.create_bucket(bucket_name)
logger.info(f"Created MinIO bucket: {bucket_name}")
else:
logger.debug(f"MinIO bucket already exists: {bucket_name}")
except Exception as e:
logger.error(f"Failed to ensure MinIO bucket exists: {e}")
# Don't raise - bucket might be created by init job
async def train_bakery_model(self,
tenant_id: str,
inventory_product_id: str,
@@ -706,18 +722,40 @@ class BakeryProphetManager:
session = None) -> str:
"""Store model with database integration"""
# Create model directory
model_dir = Path(settings.MODEL_STORAGE_PATH) / tenant_id
model_dir.mkdir(parents=True, exist_ok=True)
# Store model in MinIO (clean implementation - MinIO only)
# Use BytesIO buffer since joblib.dump() writes to file-like objects
buffer = io.BytesIO()
joblib.dump(model, buffer)
model_data = buffer.getvalue()
object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.pkl"
# Use MinIO client
from shared.clients.minio_client import minio_client
# Upload model to MinIO
success = minio_client.put_object(
bucket_name="training-models",
object_name=object_name,
data=model_data,
content_type="application/octet-stream",
metadata={
"model_id": model_id,
"tenant_id": tenant_id,
"inventory_product_id": inventory_product_id,
"model_type": "prophet_optimized"
}
)
if not success:
raise Exception("Failed to upload model to MinIO")
# Return MinIO object path
model_path = f"minio://training-models/{object_name}"
# Calculate checksum for model data
import hashlib
model_checksum = hashlib.sha256(model_data).hexdigest()
# Store model file
model_path = model_dir / f"{model_id}.pkl"
joblib.dump(model, model_path)
# Calculate checksum for model file integrity
checksummed_file = ChecksummedFile(str(model_path))
model_checksum = checksummed_file.calculate_and_save_checksum()
# Enhanced metadata with checksum
metadata = {
"model_id": model_id,
@@ -733,14 +771,23 @@ class BakeryProphetManager:
"optimized_parameters": optimized_params or {},
"created_at": datetime.now().isoformat(),
"model_type": "prophet_optimized",
"file_path": str(model_path),
"minio_path": model_path,
"checksum": model_checksum,
"checksum_algorithm": "sha256"
}
# Store metadata in MinIO as well
metadata_json = json.dumps(metadata, indent=2, default=str)
metadata_object_name = f"models/{tenant_id}/{inventory_product_id}/{model_id}.json"
minio_client.put_object(
bucket_name="training-models",
object_name=metadata_object_name,
data=metadata_json,
content_type="application/json"
)
metadata_path = model_path.with_suffix('.json')
with open(metadata_path, 'w') as f:
json.dump(metadata, f, indent=2, default=str)
# Define metadata_path for database record
metadata_path = f"minio://training-models/{metadata_object_name}"
# Store in memory
model_key = f"{tenant_id}:{inventory_product_id}"
@@ -854,16 +901,10 @@ class BakeryProphetManager:
model_path: str,
future_dates: pd.DataFrame,
regressor_columns: List[str]) -> pd.DataFrame:
"""Generate forecast using stored model with checksum verification"""
"""Generate forecast using stored model from MinIO"""
try:
# Verify model file integrity before loading
checksummed_file = ChecksummedFile(model_path)
if not checksummed_file.load_and_verify_checksum():
logger.warning(f"Checksum verification failed for model: {model_path}")
# Still load the model but log warning
# In production, you might want to raise an exception instead
model = joblib.load(model_path)
# Load model from MinIO
model = await self._load_model_from_minio(model_path)
for regressor in regressor_columns:
if regressor not in future_dates.columns:
@@ -876,6 +917,33 @@ class BakeryProphetManager:
except Exception as e:
logger.error(f"Failed to generate forecast: {str(e)}")
raise
async def _load_model_from_minio(self, model_path: str):
"""Load model from MinIO storage"""
try:
# Parse MinIO path: minio://bucket_name/object_path
if not model_path.startswith("minio://"):
raise ValueError(f"Invalid MinIO path: {model_path}")
_, bucket_and_path = model_path.split("://", 1)
bucket_name, object_name = bucket_and_path.split("/", 1)
logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
# Download model data from MinIO
model_data = self.minio_client.get_object(bucket_name, object_name)
if not model_data:
raise ValueError(f"Failed to download model from MinIO: {model_path}")
# Deserialize model (using BytesIO since joblib.load reads from file-like objects)
buffer = io.BytesIO(model_data)
model = joblib.load(buffer)
logger.info(f"Model loaded successfully from MinIO: {model_path}")
return model
except Exception as e:
logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
raise
async def _validate_training_data(self, df: pd.DataFrame, inventory_product_id: str):
"""Validate training data quality (unchanged)"""