Add minio support and forntend analitycs

2026-01-17 22:42:40 +01:00
parent fbc670ddb3
commit 3c4b5c2a06
53 changed files with 3485 additions and 437 deletions
--- a/services/forecasting/Dockerfile
+++ b/services/forecasting/Dockerfile
@@ -1,10 +1,10 @@
-# Forecasting Dockerfile
-# Add this stage at the top of each service Dockerfile
+# Forecasting Service Dockerfile with MinIO Support
+# Multi-stage build for optimized production image
 FROM python:3.11-slim AS shared
 WORKDIR /shared
 COPY shared/ /shared/

-# Then your main service stage
+# Main service stage
 FROM python:3.11-slim

 WORKDIR /app
--- a/services/forecasting/app/core/config.py
+++ b/services/forecasting/app/core/config.py
@@ -49,6 +49,18 @@ class ForecastingSettings(BaseServiceSettings):
    PREDICTION_CACHE_TTL_HOURS: int = int(os.getenv("PREDICTION_CACHE_TTL_HOURS", "6"))
    FORECAST_BATCH_SIZE: int = int(os.getenv("FORECAST_BATCH_SIZE", "100"))
    
+    # MinIO Configuration
+    MINIO_ENDPOINT: str = os.getenv("MINIO_ENDPOINT", "minio.bakery-ia.svc.cluster.local:9000")
+    MINIO_ACCESS_KEY: str = os.getenv("FORECASTING_MINIO_ACCESS_KEY", "forecasting-service")
+    MINIO_SECRET_KEY: str = os.getenv("FORECASTING_MINIO_SECRET_KEY", "forecasting-secret-key")
+    MINIO_USE_SSL: bool = os.getenv("MINIO_USE_SSL", "true").lower() == "true"
+    MINIO_MODEL_BUCKET: str = os.getenv("MINIO_MODEL_BUCKET", "training-models")
+    MINIO_CONSOLE_PORT: str = os.getenv("MINIO_CONSOLE_PORT", "9001")
+    MINIO_API_PORT: str = os.getenv("MINIO_API_PORT", "9000")
+    MINIO_REGION: str = os.getenv("MINIO_REGION", "us-east-1")
+    MINIO_MODEL_LIFECYCLE_DAYS: int = int(os.getenv("MINIO_MODEL_LIFECYCLE_DAYS", "90"))
+    MINIO_CACHE_TTL_SECONDS: int = int(os.getenv("MINIO_CACHE_TTL_SECONDS", "3600"))
+    
    # Real-time Forecasting
    REALTIME_FORECASTING_ENABLED: bool = os.getenv("REALTIME_FORECASTING_ENABLED", "true").lower() == "true"
    FORECAST_UPDATE_INTERVAL_HOURS: int = int(os.getenv("FORECAST_UPDATE_INTERVAL_HOURS", "6"))
--- a/services/forecasting/app/services/prediction_service.py
+++ b/services/forecasting/app/services/prediction_service.py
@@ -16,6 +16,7 @@ import httpx
 from pathlib import Path
 import os
 import joblib
+import io

 from app.core.config import settings
 from shared.monitoring.metrics import MetricsCollector
@@ -578,118 +579,114 @@ class PredictionService:
        return adjusted

    async def _load_model(self, model_id: str, model_path: str):
-        """Load model from file with improved validation and error handling"""
-        
-        # Enhanced model file validation
-        if not await self._validate_model_file(model_path):
-            logger.error(f"Model file not valid: {model_path}")
-            return None
-        
+        """Load model from MinIO with improved validation and error handling"""
+
        # Check cache first
        if model_id in self.model_cache:
            cached_model, cached_time = self.model_cache[model_id]
            if (datetime.now() - cached_time).seconds < self.cache_ttl:
+                logger.debug(f"Model loaded from cache: {model_id}")
                return cached_model
-        
+
+        # Validate MinIO path format
+        if not await self._validate_model_file(model_path):
+            logger.error(f"Model path not valid: {model_path}")
+            return None
+
        try:
-            if os.path.exists(model_path):
-                # Try multiple loading methods for compatibility
-                model = await self._load_model_safely(model_path)
-                
-                if model is None:
-                    logger.error(f"Failed to load model from: {model_path}")
-                    return None
-                
-                # Cache the model
-                self.model_cache[model_id] = (model, datetime.now())
-                logger.info(f"Model loaded successfully: {model_path}")
-                return model
-            else:
-                logger.error(f"Model file not found: {model_path}")
+            # Load from MinIO
+            model = await self._load_model_safely(model_path)
+
+            if model is None:
+                logger.error(f"Failed to load model from MinIO: {model_path}")
                return None
-                
+
+            # Cache the model
+            self.model_cache[model_id] = (model, datetime.now())
+            logger.info(f"Model loaded successfully from MinIO: {model_path}")
+            return model
+
        except Exception as e:
-            logger.error(f"Error loading model: {e}")
+            logger.error(f"Error loading model from MinIO: {e}")
            return None
    
    async def _load_model_safely(self, model_path: str):
-        """Safely load model with multiple fallback methods"""
-        
-        # Method 1: Try joblib first (recommended for sklearn/Prophet models)
+        """Load model from MinIO storage (clean implementation - MinIO only)"""
        try:
-            logger.debug(f"Attempting to load model with joblib: {model_path}")
-            model = joblib.load(model_path)
-            logger.info(f"Model loaded successfully with joblib")
-            return model
+            # Parse MinIO path: minio://bucket_name/object_path
+            _, bucket_and_path = model_path.split("://", 1)
+            bucket_name, object_name = bucket_and_path.split("/", 1)
+            
+            logger.debug(f"Loading model from MinIO: {bucket_name}/{object_name}")
+            
+            # Use MinIO client
+            from shared.clients.minio_client import minio_client
+            
+            # Download model data
+            model_data = minio_client.get_object(bucket_name, object_name)
+            if not model_data:
+                logger.error(f"Failed to download model from MinIO: {model_path}")
+                return None
+            
+            # Try joblib first (using BytesIO since joblib.load reads from file-like objects)
+            try:
+                buffer = io.BytesIO(model_data)
+                model = joblib.load(buffer)
+                logger.info(f"Model loaded successfully from MinIO with joblib")
+                return model
+            except Exception as e:
+                logger.warning(f"Joblib loading from MinIO failed: {e}")
+            
+            # Try pickle as fallback
+            try:
+                model = pickle.loads(model_data)
+                logger.info(f"Model loaded successfully from MinIO with pickle")
+                return model
+            except Exception as e:
+                logger.warning(f"Pickle loading from MinIO failed: {e}")
+            
+            logger.error(f"All loading methods failed for MinIO object: {model_path}")
+            return None
+            
        except Exception as e:
-            logger.warning(f"Joblib loading failed: {e}")
-        
-        # Method 2: Try pickle as fallback
-        try:
-            logger.debug(f"Attempting to load model with pickle: {model_path}")
-            with open(model_path, 'rb') as f:
-                model = pickle.load(f)
-            logger.info(f"Model loaded successfully with pickle")
-            return model
-        except Exception as e:
-            logger.warning(f"Pickle loading failed: {e}")
-        
-        # Method 3: Try pandas pickle (for Prophet models saved with pandas)
-        try:
-            logger.debug(f"Attempting to load model with pandas: {model_path}")
-            import pandas as pd
-            model = pd.read_pickle(model_path)
-            logger.info(f"Model loaded successfully with pandas")
-            return model
-        except Exception as e:
-            logger.warning(f"Pandas loading failed: {e}")
-        
-        logger.error(f"All loading methods failed for: {model_path}")
-        return None
+            logger.error(f"Failed to load model from MinIO: {model_path}, error: {e}")
+            return None
        
    async def _validate_model_file(self, model_path: str) -> bool:
-        """Enhanced model file validation"""
+        """Validate MinIO model path and check object exists"""
        try:
-            if not os.path.exists(model_path):
-                logger.error(f"Model file not found: {model_path}")
+            # Validate MinIO path format
+            if not model_path.startswith("minio://"):
+                logger.error(f"Invalid model path format (expected minio://): {model_path}")
                return False
-                
-            # Check file size (should be > 1KB for a trained model)
-            file_size = os.path.getsize(model_path)
-            if file_size < 1024:
-                logger.warning(f"Model file too small ({file_size} bytes): {model_path}")
-                return False
-                
-            # More comprehensive file format detection
+
+            # Parse MinIO path
            try:
-                with open(model_path, 'rb') as f:
-                    header = f.read(16)  # Read more bytes for better detection
-                    
-                # Check for various pickle/joblib signatures
-                valid_signatures = [
-                    b']\x93PICKLE',     # Joblib
-                    b'\x80\x03',        # Pickle protocol 3
-                    b'\x80\x04',        # Pickle protocol 4  
-                    b'\x80\x05',        # Pickle protocol 5
-                    b'}\x94',           # Newer joblib format
-                    b'}\x93',           # Alternative joblib format
-                ]
-                
-                is_valid_format = any(header.startswith(sig) for sig in valid_signatures)
-                
-                if not is_valid_format:
-                    # Log header for debugging but don't fail validation
-                    logger.warning(f"Unrecognized file header: {header[:8]} for {model_path}")
-                    logger.info("Proceeding with loading attempt despite unrecognized header")
-                    # Return True to allow loading attempt - some valid files may have different headers
-                    return True
-                
-                return True
-                
-            except Exception as e:
-                logger.error(f"Error reading model file header: {e}")
+                _, bucket_and_path = model_path.split("://", 1)
+                bucket_name, object_name = bucket_and_path.split("/", 1)
+            except ValueError:
+                logger.error(f"Cannot parse MinIO path: {model_path}")
                return False
-                
+
+            # Check if object exists in MinIO
+            from shared.clients.minio_client import minio_client
+
+            if not minio_client.object_exists(bucket_name, object_name):
+                logger.error(f"Model object not found in MinIO: {bucket_name}/{object_name}")
+                return False
+
+            # Check object metadata for size validation
+            metadata = minio_client.get_object_metadata(bucket_name, object_name)
+            if metadata:
+                file_size = metadata.get("size", 0)
+                if file_size < 1024:
+                    logger.warning(f"Model object too small ({file_size} bytes): {model_path}")
+                    return False
+
+                logger.debug(f"Model validated in MinIO: {bucket_name}/{object_name}, size={file_size}")
+
+            return True
+
        except Exception as e:
            logger.error(f"Model validation error: {e}")
            return False
--- a/services/forecasting/requirements.txt
+++ b/services/forecasting/requirements.txt
@@ -31,6 +31,7 @@ scikit-learn==1.6.1
 pandas==2.2.3
 numpy==2.2.2
 joblib==1.4.2
+minio==7.2.2

 # Messaging
 aio-pika==9.4.3