REFACTOR production scheduler

2025-10-09 18:01:24 +02:00
parent 3c689b4f98
commit b420af32c5
13 changed files with 4046 additions and 6 deletions
--- a/services/forecasting/app/api/forecasting_operations.py
+++ b/services/forecasting/app/api/forecasting_operations.py
@@ -11,6 +11,7 @@ import uuid

 from app.services.forecasting_service import EnhancedForecastingService
 from app.services.prediction_service import PredictionService
+from app.services.forecast_cache import get_forecast_cache_service
 from app.schemas.forecasts import (
    ForecastRequest, ForecastResponse, BatchForecastRequest,
    BatchForecastResponse, MultiDayForecastResponse
@@ -53,7 +54,7 @@ async def generate_single_forecast(
    current_user: dict = Depends(get_current_user_dep),
    enhanced_forecasting_service: EnhancedForecastingService = Depends(get_enhanced_forecasting_service)
 ):
-    """Generate a single product forecast"""
+    """Generate a single product forecast with caching support"""
    metrics = get_metrics_collector(request_obj)

    try:
@@ -65,11 +66,41 @@ async def generate_single_forecast(
        if metrics:
            metrics.increment_counter("single_forecasts_total")

+        # Initialize cache service
+        cache_service = get_forecast_cache_service(settings.REDIS_URL)
+
+        # Check cache first
+        cached_forecast = await cache_service.get_cached_forecast(
+            tenant_id=uuid.UUID(tenant_id),
+            product_id=uuid.UUID(request.inventory_product_id),
+            forecast_date=request.forecast_date
+        )
+
+        if cached_forecast:
+            if metrics:
+                metrics.increment_counter("forecast_cache_hits_total")
+            logger.info("Returning cached forecast",
+                       tenant_id=tenant_id,
+                       forecast_id=cached_forecast.get('id'))
+            return ForecastResponse(**cached_forecast)
+
+        # Cache miss - generate forecast
+        if metrics:
+            metrics.increment_counter("forecast_cache_misses_total")
+
        forecast = await enhanced_forecasting_service.generate_forecast(
            tenant_id=tenant_id,
            request=request
        )

+        # Cache the result
+        await cache_service.cache_forecast(
+            tenant_id=uuid.UUID(tenant_id),
+            product_id=uuid.UUID(request.inventory_product_id),
+            forecast_date=request.forecast_date,
+            forecast_data=forecast.dict()
+        )
+
        if metrics:
            metrics.increment_counter("single_forecasts_success_total")

--- a/services/forecasting/app/services/forecast_cache.py
+++ b/services/forecasting/app/services/forecast_cache.py
@@ -0,0 +1,518 @@
+# services/forecasting/app/services/forecast_cache.py
+"""
+Forecast Cache Service - Redis-based caching for forecast results
+
+Provides service-level caching for forecast predictions to eliminate redundant
+computations when multiple services (Orders, Production) request the same
+forecast data within a short time window.
+
+Cache Strategy:
+- Key: forecast:{tenant_id}:{product_id}:{forecast_date}
+- TTL: Until midnight of day after forecast_date
+- Invalidation: On model retraining for specific products
+- Metadata: Includes 'cached' flag for observability
+"""
+
+import json
+import redis
+from datetime import datetime, date, timedelta
+from typing import Optional, Dict, Any, List
+from uuid import UUID
+import structlog
+
+logger = structlog.get_logger()
+
+
+class ForecastCacheService:
+    """Service-level caching for forecast predictions"""
+
+    def __init__(self, redis_url: str):
+        """
+        Initialize Redis connection for forecast caching
+
+        Args:
+            redis_url: Redis connection URL
+        """
+        self.redis_url = redis_url
+        self._redis_client = None
+        self._connect()
+
+    def _connect(self):
+        """Establish Redis connection with retry logic"""
+        try:
+            self._redis_client = redis.from_url(
+                self.redis_url,
+                decode_responses=True,
+                socket_keepalive=True,
+                socket_keepalive_options={1: 1, 3: 3, 5: 5},
+                retry_on_timeout=True,
+                max_connections=100,  # Higher limit for forecast service
+                health_check_interval=30
+            )
+            # Test connection
+            self._redis_client.ping()
+            logger.info("Forecast cache Redis connection established")
+        except Exception as e:
+            logger.error("Failed to connect to forecast cache Redis", error=str(e))
+            self._redis_client = None
+
+    @property
+    def redis(self):
+        """Get Redis client with connection check"""
+        if self._redis_client is None:
+            self._connect()
+        return self._redis_client
+
+    def is_available(self) -> bool:
+        """Check if Redis cache is available"""
+        try:
+            return self.redis is not None and self.redis.ping()
+        except Exception:
+            return False
+
+    # ================================================================
+    # FORECAST CACHING
+    # ================================================================
+
+    def _get_forecast_key(
+        self,
+        tenant_id: UUID,
+        product_id: UUID,
+        forecast_date: date
+    ) -> str:
+        """Generate cache key for forecast"""
+        return f"forecast:{tenant_id}:{product_id}:{forecast_date.isoformat()}"
+
+    def _get_batch_forecast_key(
+        self,
+        tenant_id: UUID,
+        product_ids: List[UUID],
+        forecast_date: date
+    ) -> str:
+        """Generate cache key for batch forecast"""
+        # Sort product IDs for consistent key generation
+        sorted_ids = sorted(str(pid) for pid in product_ids)
+        products_hash = hash(tuple(sorted_ids))
+        return f"forecast:batch:{tenant_id}:{products_hash}:{forecast_date.isoformat()}"
+
+    def _calculate_ttl(self, forecast_date: date) -> int:
+        """
+        Calculate TTL for forecast cache entry
+
+        Forecasts expire at midnight of the day after forecast_date.
+        This ensures forecasts remain cached throughout the forecasted day
+        but don't become stale.
+
+        Args:
+            forecast_date: Date of the forecast
+
+        Returns:
+            TTL in seconds
+        """
+        # Expire at midnight after forecast_date
+        expiry_datetime = datetime.combine(
+            forecast_date + timedelta(days=1),
+            datetime.min.time()
+        )
+
+        now = datetime.now()
+        ttl_seconds = int((expiry_datetime - now).total_seconds())
+
+        # Minimum TTL of 1 hour, maximum of 48 hours
+        return max(3600, min(ttl_seconds, 172800))
+
+    async def get_cached_forecast(
+        self,
+        tenant_id: UUID,
+        product_id: UUID,
+        forecast_date: date
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Retrieve cached forecast if available
+
+        Args:
+            tenant_id: Tenant identifier
+            product_id: Product identifier
+            forecast_date: Date of forecast
+
+        Returns:
+            Cached forecast data or None if not found
+        """
+        if not self.is_available():
+            return None
+
+        try:
+            key = self._get_forecast_key(tenant_id, product_id, forecast_date)
+            cached_data = self.redis.get(key)
+
+            if cached_data:
+                forecast_data = json.loads(cached_data)
+                # Add cache hit metadata
+                forecast_data['cached'] = True
+                forecast_data['cache_hit_at'] = datetime.now().isoformat()
+
+                logger.info("Forecast cache HIT",
+                          tenant_id=str(tenant_id),
+                          product_id=str(product_id),
+                          forecast_date=str(forecast_date))
+                return forecast_data
+
+            logger.debug("Forecast cache MISS",
+                        tenant_id=str(tenant_id),
+                        product_id=str(product_id),
+                        forecast_date=str(forecast_date))
+            return None
+
+        except Exception as e:
+            logger.error("Error retrieving cached forecast",
+                        error=str(e),
+                        tenant_id=str(tenant_id))
+            return None
+
+    async def cache_forecast(
+        self,
+        tenant_id: UUID,
+        product_id: UUID,
+        forecast_date: date,
+        forecast_data: Dict[str, Any]
+    ) -> bool:
+        """
+        Cache forecast prediction result
+
+        Args:
+            tenant_id: Tenant identifier
+            product_id: Product identifier
+            forecast_date: Date of forecast
+            forecast_data: Forecast prediction data to cache
+
+        Returns:
+            True if cached successfully, False otherwise
+        """
+        if not self.is_available():
+            logger.warning("Redis not available, skipping forecast cache")
+            return False
+
+        try:
+            key = self._get_forecast_key(tenant_id, product_id, forecast_date)
+            ttl = self._calculate_ttl(forecast_date)
+
+            # Add caching metadata
+            cache_entry = {
+                **forecast_data,
+                'cached_at': datetime.now().isoformat(),
+                'cache_key': key,
+                'ttl_seconds': ttl
+            }
+
+            # Serialize and cache
+            self.redis.setex(
+                key,
+                ttl,
+                json.dumps(cache_entry, default=str)
+            )
+
+            logger.info("Forecast cached successfully",
+                       tenant_id=str(tenant_id),
+                       product_id=str(product_id),
+                       forecast_date=str(forecast_date),
+                       ttl_hours=round(ttl / 3600, 2))
+            return True
+
+        except Exception as e:
+            logger.error("Error caching forecast",
+                        error=str(e),
+                        tenant_id=str(tenant_id))
+            return False
+
+    async def get_cached_batch_forecast(
+        self,
+        tenant_id: UUID,
+        product_ids: List[UUID],
+        forecast_date: date
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Retrieve cached batch forecast
+
+        Args:
+            tenant_id: Tenant identifier
+            product_ids: List of product identifiers
+            forecast_date: Date of forecast
+
+        Returns:
+            Cached batch forecast data or None
+        """
+        if not self.is_available():
+            return None
+
+        try:
+            key = self._get_batch_forecast_key(tenant_id, product_ids, forecast_date)
+            cached_data = self.redis.get(key)
+
+            if cached_data:
+                forecast_data = json.loads(cached_data)
+                forecast_data['cached'] = True
+                forecast_data['cache_hit_at'] = datetime.now().isoformat()
+
+                logger.info("Batch forecast cache HIT",
+                          tenant_id=str(tenant_id),
+                          products_count=len(product_ids),
+                          forecast_date=str(forecast_date))
+                return forecast_data
+
+            return None
+
+        except Exception as e:
+            logger.error("Error retrieving cached batch forecast", error=str(e))
+            return None
+
+    async def cache_batch_forecast(
+        self,
+        tenant_id: UUID,
+        product_ids: List[UUID],
+        forecast_date: date,
+        forecast_data: Dict[str, Any]
+    ) -> bool:
+        """Cache batch forecast result"""
+        if not self.is_available():
+            return False
+
+        try:
+            key = self._get_batch_forecast_key(tenant_id, product_ids, forecast_date)
+            ttl = self._calculate_ttl(forecast_date)
+
+            cache_entry = {
+                **forecast_data,
+                'cached_at': datetime.now().isoformat(),
+                'cache_key': key,
+                'ttl_seconds': ttl
+            }
+
+            self.redis.setex(key, ttl, json.dumps(cache_entry, default=str))
+
+            logger.info("Batch forecast cached successfully",
+                       tenant_id=str(tenant_id),
+                       products_count=len(product_ids),
+                       ttl_hours=round(ttl / 3600, 2))
+            return True
+
+        except Exception as e:
+            logger.error("Error caching batch forecast", error=str(e))
+            return False
+
+    # ================================================================
+    # CACHE INVALIDATION
+    # ================================================================
+
+    async def invalidate_product_forecasts(
+        self,
+        tenant_id: UUID,
+        product_id: UUID
+    ) -> int:
+        """
+        Invalidate all forecast cache entries for a product
+
+        Called when model is retrained for specific product.
+
+        Args:
+            tenant_id: Tenant identifier
+            product_id: Product identifier
+
+        Returns:
+            Number of cache entries invalidated
+        """
+        if not self.is_available():
+            return 0
+
+        try:
+            # Find all keys matching this product
+            pattern = f"forecast:{tenant_id}:{product_id}:*"
+            keys = self.redis.keys(pattern)
+
+            if keys:
+                deleted = self.redis.delete(*keys)
+                logger.info("Invalidated product forecast cache",
+                          tenant_id=str(tenant_id),
+                          product_id=str(product_id),
+                          keys_deleted=deleted)
+                return deleted
+
+            return 0
+
+        except Exception as e:
+            logger.error("Error invalidating product forecasts",
+                        error=str(e),
+                        tenant_id=str(tenant_id))
+            return 0
+
+    async def invalidate_tenant_forecasts(
+        self,
+        tenant_id: UUID,
+        forecast_date: Optional[date] = None
+    ) -> int:
+        """
+        Invalidate forecast cache for tenant
+
+        Args:
+            tenant_id: Tenant identifier
+            forecast_date: Optional specific date to invalidate
+
+        Returns:
+            Number of cache entries invalidated
+        """
+        if not self.is_available():
+            return 0
+
+        try:
+            if forecast_date:
+                pattern = f"forecast:{tenant_id}:*:{forecast_date.isoformat()}"
+            else:
+                pattern = f"forecast:{tenant_id}:*"
+
+            keys = self.redis.keys(pattern)
+
+            if keys:
+                deleted = self.redis.delete(*keys)
+                logger.info("Invalidated tenant forecast cache",
+                          tenant_id=str(tenant_id),
+                          forecast_date=str(forecast_date) if forecast_date else "all",
+                          keys_deleted=deleted)
+                return deleted
+
+            return 0
+
+        except Exception as e:
+            logger.error("Error invalidating tenant forecasts", error=str(e))
+            return 0
+
+    async def invalidate_all_forecasts(self) -> int:
+        """
+        Invalidate all forecast cache entries (use with caution)
+
+        Returns:
+            Number of cache entries invalidated
+        """
+        if not self.is_available():
+            return 0
+
+        try:
+            pattern = "forecast:*"
+            keys = self.redis.keys(pattern)
+
+            if keys:
+                deleted = self.redis.delete(*keys)
+                logger.warning("Invalidated ALL forecast cache", keys_deleted=deleted)
+                return deleted
+
+            return 0
+
+        except Exception as e:
+            logger.error("Error invalidating all forecasts", error=str(e))
+            return 0
+
+    # ================================================================
+    # CACHE STATISTICS & MONITORING
+    # ================================================================
+
+    def get_cache_stats(self) -> Dict[str, Any]:
+        """
+        Get cache statistics for monitoring
+
+        Returns:
+            Dictionary with cache metrics
+        """
+        if not self.is_available():
+            return {"available": False}
+
+        try:
+            info = self.redis.info()
+
+            # Get forecast-specific stats
+            forecast_keys = self.redis.keys("forecast:*")
+            batch_keys = self.redis.keys("forecast:batch:*")
+
+            return {
+                "available": True,
+                "total_forecast_keys": len(forecast_keys),
+                "batch_forecast_keys": len(batch_keys),
+                "single_forecast_keys": len(forecast_keys) - len(batch_keys),
+                "used_memory": info.get("used_memory_human"),
+                "connected_clients": info.get("connected_clients"),
+                "keyspace_hits": info.get("keyspace_hits", 0),
+                "keyspace_misses": info.get("keyspace_misses", 0),
+                "hit_rate_percent": self._calculate_hit_rate(
+                    info.get("keyspace_hits", 0),
+                    info.get("keyspace_misses", 0)
+                ),
+                "total_commands_processed": info.get("total_commands_processed", 0)
+            }
+        except Exception as e:
+            logger.error("Error getting cache stats", error=str(e))
+            return {"available": False, "error": str(e)}
+
+    def _calculate_hit_rate(self, hits: int, misses: int) -> float:
+        """Calculate cache hit rate percentage"""
+        total = hits + misses
+        return round((hits / total * 100), 2) if total > 0 else 0.0
+
+    async def get_cached_forecast_info(
+        self,
+        tenant_id: UUID,
+        product_id: UUID,
+        forecast_date: date
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Get metadata about cached forecast without retrieving full data
+
+        Args:
+            tenant_id: Tenant identifier
+            product_id: Product identifier
+            forecast_date: Date of forecast
+
+        Returns:
+            Cache metadata or None
+        """
+        if not self.is_available():
+            return None
+
+        try:
+            key = self._get_forecast_key(tenant_id, product_id, forecast_date)
+            ttl = self.redis.ttl(key)
+
+            if ttl > 0:
+                return {
+                    "cached": True,
+                    "cache_key": key,
+                    "ttl_seconds": ttl,
+                    "ttl_hours": round(ttl / 3600, 2),
+                    "expires_at": (datetime.now() + timedelta(seconds=ttl)).isoformat()
+                }
+
+            return None
+
+        except Exception as e:
+            logger.error("Error getting forecast cache info", error=str(e))
+            return None
+
+
+# Global cache service instance
+_cache_service = None
+
+
+def get_forecast_cache_service(redis_url: Optional[str] = None) -> ForecastCacheService:
+    """
+    Get the global forecast cache service instance
+
+    Args:
+        redis_url: Redis connection URL (required for first call)
+
+    Returns:
+        ForecastCacheService instance
+    """
+    global _cache_service
+
+    if _cache_service is None:
+        if redis_url is None:
+            raise ValueError("redis_url required for first initialization")
+        _cache_service = ForecastCacheService(redis_url)
+
+    return _cache_service
--- a/services/orders/app/services/procurement_service.py
+++ b/services/orders/app/services/procurement_service.py
@@ -309,6 +309,9 @@ class ProcurementService:
            elif status == "cancelled":
                updates["execution_completed_at"] = datetime.utcnow()

+                # Handle plan rejection workflow - trigger notification and potential regeneration
+                await self._handle_plan_rejection(tenant_id, plan_id, approval_notes, updated_by)
+
            plan = await self.plan_repo.update_plan(plan_id, tenant_id, updates)
            if plan:
                await self.db.commit()
@@ -1238,6 +1241,168 @@ class ProcurementService:
        except Exception as e:
            logger.warning("Failed to publish event", error=str(e))

+    async def _handle_plan_rejection(
+        self,
+        tenant_id: uuid.UUID,
+        plan_id: uuid.UUID,
+        rejection_notes: Optional[str],
+        rejected_by: Optional[uuid.UUID]
+    ) -> None:
+        """
+        Handle plan rejection workflow with notifications and optional regeneration
+
+        When a plan is rejected:
+        1. Send notifications to stakeholders
+        2. Analyze rejection reason
+        3. Offer regeneration option
+        4. Publish rejection event
+        """
+        try:
+            logger.info("Processing plan rejection",
+                       tenant_id=str(tenant_id),
+                       plan_id=str(plan_id),
+                       rejected_by=str(rejected_by) if rejected_by else None)
+
+            # Get plan details
+            plan = await self.plan_repo.get_plan_by_id(plan_id, tenant_id)
+            if not plan:
+                logger.error("Plan not found for rejection handling", plan_id=plan_id)
+                return
+
+            # Send notification to stakeholders
+            await self._send_plan_rejection_notification(
+                tenant_id, plan_id, plan.plan_number, rejection_notes, rejected_by
+            )
+
+            # Publish rejection event with details
+            await self._publish_plan_rejection_event(
+                tenant_id, plan_id, rejection_notes, rejected_by
+            )
+
+            # Check if we should auto-regenerate (e.g., if rejection due to stale data)
+            should_regenerate = self._should_auto_regenerate_plan(rejection_notes)
+            if should_regenerate:
+                logger.info("Auto-regenerating plan after rejection",
+                           plan_id=plan_id, reason="stale data detected")
+
+                # Schedule regeneration (async task to not block rejection)
+                await self._schedule_plan_regeneration(tenant_id, plan.plan_date)
+
+        except Exception as e:
+            logger.error("Error handling plan rejection",
+                        error=str(e),
+                        plan_id=plan_id,
+                        tenant_id=str(tenant_id))
+
+    def _should_auto_regenerate_plan(self, rejection_notes: Optional[str]) -> bool:
+        """Determine if plan should be auto-regenerated based on rejection reason"""
+        if not rejection_notes:
+            return False
+
+        # Auto-regenerate if rejection mentions stale data or outdated forecasts
+        auto_regenerate_keywords = [
+            "stale", "outdated", "old data", "datos antiguos",
+            "desactualizado", "obsoleto"
+        ]
+
+        rejection_lower = rejection_notes.lower()
+        return any(keyword in rejection_lower for keyword in auto_regenerate_keywords)
+
+    async def _send_plan_rejection_notification(
+        self,
+        tenant_id: uuid.UUID,
+        plan_id: uuid.UUID,
+        plan_number: str,
+        rejection_notes: Optional[str],
+        rejected_by: Optional[uuid.UUID]
+    ) -> None:
+        """Send notifications about plan rejection"""
+        try:
+            notification_data = {
+                "type": "procurement_plan_rejected",
+                "severity": "medium",
+                "title": f"Plan de Aprovisionamiento Rechazado: {plan_number}",
+                "message": f"El plan {plan_number} ha sido rechazado. {rejection_notes or 'Sin motivo especificado.'}",
+                "metadata": {
+                    "tenant_id": str(tenant_id),
+                    "plan_id": str(plan_id),
+                    "plan_number": plan_number,
+                    "rejection_notes": rejection_notes,
+                    "rejected_by": str(rejected_by) if rejected_by else None,
+                    "rejected_at": datetime.utcnow().isoformat(),
+                    "action_required": "review_and_regenerate"
+                }
+            }
+
+            await self.rabbitmq_client.publish_event(
+                exchange_name="bakery_events",
+                routing_key="procurement.plan.rejected",
+                event_data=notification_data
+            )
+
+            logger.info("Plan rejection notification sent",
+                       tenant_id=str(tenant_id),
+                       plan_id=str(plan_id))
+
+        except Exception as e:
+            logger.error("Failed to send plan rejection notification", error=str(e))
+
+    async def _publish_plan_rejection_event(
+        self,
+        tenant_id: uuid.UUID,
+        plan_id: uuid.UUID,
+        rejection_notes: Optional[str],
+        rejected_by: Optional[uuid.UUID]
+    ) -> None:
+        """Publish plan rejection event for downstream systems"""
+        try:
+            event_data = {
+                "tenant_id": str(tenant_id),
+                "plan_id": str(plan_id),
+                "rejection_notes": rejection_notes,
+                "rejected_by": str(rejected_by) if rejected_by else None,
+                "timestamp": datetime.utcnow().isoformat(),
+                "event_type": "procurement.plan.rejected"
+            }
+
+            await self.rabbitmq_client.publish_event(
+                exchange_name="procurement.events",
+                routing_key="procurement.plan.rejected",
+                event_data=event_data
+            )
+
+        except Exception as e:
+            logger.warning("Failed to publish plan rejection event", error=str(e))
+
+    async def _schedule_plan_regeneration(
+        self,
+        tenant_id: uuid.UUID,
+        plan_date: date
+    ) -> None:
+        """Schedule automatic plan regeneration after rejection"""
+        try:
+            logger.info("Scheduling plan regeneration",
+                       tenant_id=str(tenant_id),
+                       plan_date=str(plan_date))
+
+            # Publish regeneration request event
+            event_data = {
+                "tenant_id": str(tenant_id),
+                "plan_date": plan_date.isoformat(),
+                "trigger": "rejection_auto_regenerate",
+                "timestamp": datetime.utcnow().isoformat(),
+                "event_type": "procurement.plan.regeneration_requested"
+            }
+
+            await self.rabbitmq_client.publish_event(
+                exchange_name="procurement.events",
+                routing_key="procurement.plan.regeneration_requested",
+                event_data=event_data
+            )
+
+        except Exception as e:
+            logger.error("Failed to schedule plan regeneration", error=str(e))
+
    async def _publish_plan_status_changed_event(
        self,
        tenant_id: uuid.UUID,
--- a/services/production/app/main.py
+++ b/services/production/app/main.py
@@ -12,6 +12,7 @@ from sqlalchemy import text
 from app.core.config import settings
 from app.core.database import database_manager
 from app.services.production_alert_service import ProductionAlertService
+from app.services.production_scheduler_service import ProductionSchedulerService
 from shared.service_base import StandardFastAPIService

 # Import standardized routers
@@ -56,8 +57,9 @@ class ProductionService(StandardFastAPIService):
        ]

        self.alert_service = None
+        self.scheduler_service = None

-        # Create custom checks for alert service
+        # Create custom checks for services
        async def check_alert_service():
            """Check production alert service health"""
            try:
@@ -66,6 +68,14 @@ class ProductionService(StandardFastAPIService):
                self.logger.error("Alert service health check failed", error=str(e))
                return False

+        async def check_scheduler_service():
+            """Check production scheduler service health"""
+            try:
+                return bool(self.scheduler_service) if self.scheduler_service else False
+            except Exception as e:
+                self.logger.error("Scheduler service health check failed", error=str(e))
+                return False
+
        super().__init__(
            service_name=settings.SERVICE_NAME,
            app_name=settings.APP_NAME,
@@ -74,7 +84,10 @@ class ProductionService(StandardFastAPIService):
            api_prefix="",  # Empty because RouteBuilder already includes /api/v1
            database_manager=database_manager,
            expected_tables=production_expected_tables,
-            custom_health_checks={"alert_service": check_alert_service}
+            custom_health_checks={
+                "alert_service": check_alert_service,
+                "scheduler_service": check_scheduler_service
+            }
        )

    async def on_startup(self, app: FastAPI):
@@ -84,11 +97,22 @@ class ProductionService(StandardFastAPIService):
        await self.alert_service.start()
        self.logger.info("Production alert service started")

-        # Store alert service in app state
+        # Initialize production scheduler service
+        self.scheduler_service = ProductionSchedulerService(settings)
+        await self.scheduler_service.start()
+        self.logger.info("Production scheduler service started")
+
+        # Store services in app state
        app.state.alert_service = self.alert_service
+        app.state.scheduler_service = self.scheduler_service

    async def on_shutdown(self, app: FastAPI):
-        """Custom shutdown logic for production service"""
+        """Custom startup logic for production service"""
+        # Stop scheduler service
+        if self.scheduler_service:
+            await self.scheduler_service.stop()
+            self.logger.info("Scheduler service stopped")
+
        # Stop alert service
        if self.alert_service:
            await self.alert_service.stop()
@@ -100,6 +124,7 @@ class ProductionService(StandardFastAPIService):
            "production_planning",
            "batch_management",
            "production_scheduling",
+            "automated_daily_scheduling",  # NEW: Automated scheduler
            "quality_control",
            "equipment_management",
            "capacity_planning",
@@ -144,6 +169,21 @@ service.add_router(production_dashboard.router)
 service.add_router(analytics.router)


+@app.post("/test/production-scheduler")
+async def test_production_scheduler():
+    """Test endpoint to manually trigger production scheduler"""
+    try:
+        if hasattr(app.state, 'scheduler_service'):
+            scheduler_service = app.state.scheduler_service
+            await scheduler_service.test_production_schedule_generation()
+            return {"message": "Production scheduler test triggered successfully"}
+        else:
+            return {"error": "Scheduler service not available"}
+    except Exception as e:
+        service.logger.error("Error testing production scheduler", error=str(e))
+        return {"error": f"Failed to trigger scheduler test: {str(e)}"}
+
+
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
--- a/services/production/app/services/production_scheduler_service.py
+++ b/services/production/app/services/production_scheduler_service.py
@@ -0,0 +1,493 @@
+# services/production/app/services/production_scheduler_service.py
+"""
+Production Scheduler Service - Daily production planning automation
+
+Automatically generates daily production schedules for all active tenants based on:
+- Demand forecasts from Orders Service
+- Current inventory levels
+- Production capacity
+- Recipe requirements
+
+Runs daily at 5:30 AM (before procurement @ 6:00 AM) to ensure production
+plans are ready for the day ahead.
+"""
+
+import asyncio
+from datetime import datetime, timedelta, date
+from typing import List, Dict, Any, Optional
+from uuid import UUID
+from decimal import Decimal
+import structlog
+from apscheduler.triggers.cron import CronTrigger
+from zoneinfo import ZoneInfo
+
+from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
+from shared.database.base import create_database_manager
+from app.services.production_service import ProductionService
+from app.schemas.production import ProductionScheduleCreate, ProductionBatchCreate
+from app.models.production import ProductionStatus, ProductionPriority
+
+logger = structlog.get_logger()
+
+
+class ProductionSchedulerService(BaseAlertService, AlertServiceMixin):
+    """
+    Production scheduler service for automated daily production planning
+    Extends BaseAlertService to use proven scheduling infrastructure
+    """
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.production_service = None
+
+    async def start(self):
+        """Initialize scheduler and production service"""
+        await super().start()
+
+        # Store database manager for session creation
+        from app.core.database import database_manager
+        self.db_manager = database_manager
+
+        logger.info("Production scheduler service started", service=self.config.SERVICE_NAME)
+
+    def setup_scheduled_checks(self):
+        """Configure daily production planning jobs"""
+
+        # Daily production planning at 5:30 AM (before procurement)
+        # This ensures production plans are ready before procurement plans
+        self.scheduler.add_job(
+            func=self.run_daily_production_planning,
+            trigger=CronTrigger(hour=5, minute=30),
+            id="daily_production_planning",
+            name="Daily Production Planning",
+            misfire_grace_time=300,  # 5 minutes grace period
+            coalesce=True,  # Combine missed runs
+            max_instances=1  # Only one instance at a time
+        )
+
+        # Stale schedule cleanup at 5:50 AM
+        self.scheduler.add_job(
+            func=self.run_stale_schedule_cleanup,
+            trigger=CronTrigger(hour=5, minute=50),
+            id="stale_schedule_cleanup",
+            name="Stale Schedule Cleanup",
+            misfire_grace_time=300,
+            coalesce=True,
+            max_instances=1
+        )
+
+        # Test job for development (every 30 minutes if DEBUG enabled)
+        if getattr(self.config, 'DEBUG', False) or getattr(self.config, 'PRODUCTION_TEST_MODE', False):
+            self.scheduler.add_job(
+                func=self.run_daily_production_planning,
+                trigger=CronTrigger(minute='*/30'),
+                id="test_production_planning",
+                name="Test Production Planning (30min)",
+                misfire_grace_time=300,
+                coalesce=True,
+                max_instances=1
+            )
+            logger.info("⚡ Test production planning job added (every 30 minutes)")
+
+        logger.info("📅 Production scheduled jobs configured",
+                   jobs_count=len(self.scheduler.get_jobs()))
+
+    async def run_daily_production_planning(self):
+        """
+        Execute daily production planning for all active tenants
+        Processes tenants in parallel with individual timeouts
+        """
+        if not self.is_leader:
+            logger.debug("Skipping production planning - not leader")
+            return
+
+        try:
+            self._checks_performed += 1
+            logger.info("🔄 Starting daily production planning execution",
+                       timestamp=datetime.now().isoformat())
+
+            # Get active non-demo tenants
+            active_tenants = await self.get_active_tenants()
+            if not active_tenants:
+                logger.info("No active tenants found for production planning")
+                return
+
+            logger.info(f"Processing {len(active_tenants)} tenants in parallel")
+
+            # Create tasks with timeout for each tenant
+            tasks = [
+                self._process_tenant_with_timeout(tenant_id, timeout_seconds=180)
+                for tenant_id in active_tenants
+            ]
+
+            # Execute all tasks in parallel
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+
+            # Count successes and failures
+            processed_tenants = sum(1 for r in results if r is True)
+            failed_tenants = sum(1 for r in results if isinstance(r, Exception) or r is False)
+
+            logger.info("🎯 Daily production planning completed",
+                       total_tenants=len(active_tenants),
+                       processed_tenants=processed_tenants,
+                       failed_tenants=failed_tenants)
+
+        except Exception as e:
+            self._errors_count += 1
+            logger.error("💥 Daily production planning failed completely", error=str(e))
+
+    async def _process_tenant_with_timeout(self, tenant_id: UUID, timeout_seconds: int = 180) -> bool:
+        """
+        Process tenant production planning with timeout
+        Returns True on success, False or raises exception on failure
+        """
+        try:
+            await asyncio.wait_for(
+                self.process_tenant_production(tenant_id),
+                timeout=timeout_seconds
+            )
+            logger.info("✅ Successfully processed tenant", tenant_id=str(tenant_id))
+            return True
+        except asyncio.TimeoutError:
+            logger.error("⏱️ Tenant processing timed out",
+                        tenant_id=str(tenant_id),
+                        timeout=timeout_seconds)
+            return False
+        except Exception as e:
+            logger.error("❌ Error processing tenant production",
+                        tenant_id=str(tenant_id),
+                        error=str(e))
+            raise
+
+    async def process_tenant_production(self, tenant_id: UUID):
+        """Process production planning for a specific tenant"""
+        try:
+            # Get tenant timezone for accurate date calculation
+            tenant_tz = await self._get_tenant_timezone(tenant_id)
+
+            # Calculate target date in tenant's timezone
+            target_date = datetime.now(ZoneInfo(tenant_tz)).date()
+
+            logger.info("Processing production for tenant",
+                       tenant_id=str(tenant_id),
+                       target_date=str(target_date),
+                       timezone=tenant_tz)
+
+            # Check if schedule already exists for this date
+            async with self.db_manager.get_session() as session:
+                production_service = ProductionService(self.db_manager, self.config)
+
+                # Check for existing schedule
+                existing_schedule = await self._get_schedule_by_date(
+                    session, tenant_id, target_date
+                )
+
+                if existing_schedule:
+                    logger.info("📋 Production schedule already exists, skipping",
+                               tenant_id=str(tenant_id),
+                               schedule_date=str(target_date),
+                               schedule_id=str(existing_schedule.get('id')))
+                    return
+
+                # Calculate daily requirements
+                requirements = await production_service.calculate_daily_requirements(
+                    tenant_id, target_date
+                )
+
+                if not requirements.production_plan:
+                    logger.info("No production requirements for date",
+                               tenant_id=str(tenant_id),
+                               date=str(target_date))
+                    return
+
+                # Create production schedule
+                schedule_data = ProductionScheduleCreate(
+                    schedule_date=target_date,
+                    schedule_name=f"Daily Production - {target_date.strftime('%Y-%m-%d')}",
+                    status="draft",
+                    notes=f"Auto-generated daily production schedule for {target_date}",
+                    total_batches=len(requirements.production_plan),
+                    auto_generated=True
+                )
+
+                schedule = await production_service.create_production_schedule(
+                    tenant_id, schedule_data
+                )
+
+                # Create production batches from requirements
+                batches_created = 0
+                for item in requirements.production_plan:
+                    try:
+                        batch_data = await self._create_batch_from_requirement(
+                            item, schedule.id, target_date
+                        )
+
+                        batch = await production_service.create_production_batch(
+                            tenant_id, batch_data
+                        )
+                        batches_created += 1
+
+                    except Exception as e:
+                        logger.error("Error creating batch from requirement",
+                                   tenant_id=str(tenant_id),
+                                   product=item.get('product_name'),
+                                   error=str(e))
+
+                # Send notification about new schedule
+                await self.send_production_schedule_notification(
+                    tenant_id, schedule.id, batches_created
+                )
+
+                logger.info("🎉 Production schedule created successfully",
+                           tenant_id=str(tenant_id),
+                           schedule_id=str(schedule.id),
+                           schedule_date=str(target_date),
+                           batches_created=batches_created)
+
+        except Exception as e:
+            logger.error("💥 Error processing tenant production",
+                        tenant_id=str(tenant_id),
+                        error=str(e))
+            raise
+
+    async def _get_tenant_timezone(self, tenant_id: UUID) -> str:
+        """Get tenant's timezone, fallback to UTC if not configured"""
+        try:
+            from services.tenant.app.models.tenants import Tenant
+            from sqlalchemy import select
+            import os
+
+            tenant_db_url = os.getenv("TENANT_DATABASE_URL")
+            if not tenant_db_url:
+                logger.warning("TENANT_DATABASE_URL not set, using UTC")
+                return "UTC"
+
+            tenant_db = create_database_manager(tenant_db_url, "tenant-tz-lookup")
+
+            async with tenant_db.get_session() as session:
+                result = await session.execute(
+                    select(Tenant).where(Tenant.id == tenant_id)
+                )
+                tenant = result.scalars().first()
+
+                if tenant and hasattr(tenant, 'timezone') and tenant.timezone:
+                    return tenant.timezone
+
+                # Default to Europe/Madrid for Spanish bakeries
+                return "Europe/Madrid"
+
+        except Exception as e:
+            logger.warning("Could not fetch tenant timezone, using UTC",
+                         tenant_id=str(tenant_id), error=str(e))
+            return "UTC"
+
+    async def _get_schedule_by_date(self, session, tenant_id: UUID, schedule_date: date) -> Optional[Dict]:
+        """Check if production schedule exists for date"""
+        try:
+            from sqlalchemy import select, and_
+            from app.models.production import ProductionSchedule
+
+            result = await session.execute(
+                select(ProductionSchedule).where(
+                    and_(
+                        ProductionSchedule.tenant_id == tenant_id,
+                        ProductionSchedule.schedule_date == schedule_date
+                    )
+                )
+            )
+            schedule = result.scalars().first()
+
+            if schedule:
+                return {"id": schedule.id, "status": schedule.status}
+            return None
+
+        except Exception as e:
+            logger.error("Error checking existing schedule", error=str(e))
+            return None
+
+    async def _create_batch_from_requirement(
+        self,
+        requirement: Dict[str, Any],
+        schedule_id: UUID,
+        target_date: date
+    ) -> ProductionBatchCreate:
+        """Create batch data from production requirement"""
+
+        # Map urgency to priority
+        urgency_to_priority = {
+            "high": ProductionPriority.HIGH,
+            "medium": ProductionPriority.MEDIUM,
+            "low": ProductionPriority.LOW
+        }
+        priority = urgency_to_priority.get(requirement.get('urgency', 'medium'), ProductionPriority.MEDIUM)
+
+        # Calculate planned times (start at 6 AM, estimate 2 hours per batch)
+        planned_start = datetime.combine(target_date, datetime.min.time().replace(hour=6))
+        planned_duration = 120  # 2 hours default
+
+        return ProductionBatchCreate(
+            schedule_id=schedule_id,
+            product_id=UUID(requirement['product_id']),
+            product_name=requirement['product_name'],
+            planned_quantity=Decimal(str(requirement['recommended_production'])),
+            unit_of_measure="units",
+            priority=priority,
+            status=ProductionStatus.PLANNED,
+            planned_start_time=planned_start,
+            planned_duration_minutes=planned_duration,
+            notes=f"Auto-generated from demand forecast. Urgency: {requirement.get('urgency', 'medium')}",
+            auto_generated=True
+        )
+
+    async def run_stale_schedule_cleanup(self):
+        """
+        Clean up stale production schedules and send reminders
+        """
+        if not self.is_leader:
+            logger.debug("Skipping stale schedule cleanup - not leader")
+            return
+
+        try:
+            logger.info("🧹 Starting stale schedule cleanup")
+
+            active_tenants = await self.get_active_tenants()
+            if not active_tenants:
+                logger.info("No active tenants found for cleanup")
+                return
+
+            total_archived = 0
+            total_cancelled = 0
+            total_escalated = 0
+
+            # Process each tenant's stale schedules
+            for tenant_id in active_tenants:
+                try:
+                    stats = await self._cleanup_tenant_schedules(tenant_id)
+                    total_archived += stats.get('archived', 0)
+                    total_cancelled += stats.get('cancelled', 0)
+                    total_escalated += stats.get('escalated', 0)
+
+                except Exception as e:
+                    logger.error("Error cleaning up tenant schedules",
+                               tenant_id=str(tenant_id),
+                               error=str(e))
+
+            logger.info("✅ Stale schedule cleanup completed",
+                       archived=total_archived,
+                       cancelled=total_cancelled,
+                       escalated=total_escalated)
+
+        except Exception as e:
+            self._errors_count += 1
+            logger.error("💥 Stale schedule cleanup failed", error=str(e))
+
+    async def _cleanup_tenant_schedules(self, tenant_id: UUID) -> Dict[str, int]:
+        """Cleanup stale schedules for a specific tenant"""
+        stats = {"archived": 0, "cancelled": 0, "escalated": 0}
+
+        try:
+            async with self.db_manager.get_session() as session:
+                from sqlalchemy import select, and_
+                from app.models.production import ProductionSchedule
+
+                today = date.today()
+
+                # Get all schedules for tenant
+                result = await session.execute(
+                    select(ProductionSchedule).where(
+                        ProductionSchedule.tenant_id == tenant_id
+                    )
+                )
+                schedules = result.scalars().all()
+
+                for schedule in schedules:
+                    schedule_age_days = (today - schedule.schedule_date).days
+
+                    # Archive completed schedules older than 90 days
+                    if schedule.status == "completed" and schedule_age_days > 90:
+                        schedule.archived = True
+                        stats["archived"] += 1
+
+                    # Cancel draft schedules older than 7 days
+                    elif schedule.status == "draft" and schedule_age_days > 7:
+                        schedule.status = "cancelled"
+                        schedule.notes = (schedule.notes or "") + "\nAuto-cancelled: stale draft schedule"
+                        stats["cancelled"] += 1
+
+                    # Escalate overdue schedules
+                    elif schedule.schedule_date == today and schedule.status in ['draft', 'pending_approval']:
+                        await self._send_schedule_escalation_alert(tenant_id, schedule.id)
+                        stats["escalated"] += 1
+
+                await session.commit()
+
+        except Exception as e:
+            logger.error("Error in tenant schedule cleanup",
+                       tenant_id=str(tenant_id), error=str(e))
+
+        return stats
+
+    async def send_production_schedule_notification(
+        self,
+        tenant_id: UUID,
+        schedule_id: UUID,
+        batches_count: int
+    ):
+        """Send notification about new production schedule"""
+        try:
+            alert_data = {
+                "type": "production_schedule_created",
+                "severity": "low",
+                "title": "Nuevo Plan de Producción Generado",
+                "message": f"Plan de producción diario creado con {batches_count} lotes programados",
+                "metadata": {
+                    "tenant_id": str(tenant_id),
+                    "schedule_id": str(schedule_id),
+                    "batches_count": batches_count,
+                    "auto_generated": True
+                }
+            }
+
+            await self.publish_item(tenant_id, alert_data, item_type='alert')
+
+        except Exception as e:
+            logger.error("Error sending schedule notification",
+                        tenant_id=str(tenant_id),
+                        error=str(e))
+
+    async def _send_schedule_escalation_alert(self, tenant_id: UUID, schedule_id: UUID):
+        """Send escalation alert for overdue schedule"""
+        try:
+            alert_data = {
+                "type": "schedule_escalation",
+                "severity": "high",
+                "title": "Plan de Producción Vencido",
+                "message": "Plan de producción para hoy no ha sido procesado - Requiere atención urgente",
+                "metadata": {
+                    "tenant_id": str(tenant_id),
+                    "schedule_id": str(schedule_id),
+                    "escalation_level": "urgent"
+                }
+            }
+
+            await self.publish_item(tenant_id, alert_data, item_type='alert')
+
+        except Exception as e:
+            logger.error("Error sending escalation alert", error=str(e))
+
+    async def test_production_schedule_generation(self):
+        """Test method to manually trigger production planning"""
+        active_tenants = await self.get_active_tenants()
+        if not active_tenants:
+            logger.error("No active tenants found for testing production schedule generation")
+            return
+
+        test_tenant_id = active_tenants[0]
+        logger.info("Testing production schedule generation", tenant_id=str(test_tenant_id))
+
+        try:
+            await self.process_tenant_production(test_tenant_id)
+            logger.info("Test production schedule generation completed successfully")
+        except Exception as e:
+            logger.error("Test production schedule generation failed",
+                        error=str(e), tenant_id=str(test_tenant_id))
--- a/services/tenant/app/models/tenants.py
+++ b/services/tenant/app/models/tenants.py
@@ -28,7 +28,10 @@ class Tenant(Base):
    postal_code = Column(String(10), nullable=False)
    latitude = Column(Float)
    longitude = Column(Float)
-    
+
+    # Timezone configuration for accurate scheduling
+    timezone = Column(String(50), default="Europe/Madrid", nullable=False)
+
    # Contact info
    phone = Column(String(20))
    email = Column(String(255))
--- a/services/tenant/migrations/versions/20251009_add_timezone_to_tenants.py
+++ b/services/tenant/migrations/versions/20251009_add_timezone_to_tenants.py
@@ -0,0 +1,27 @@
+"""Add timezone column to tenants
+
+Revision ID: 20251009_timezone
+Revises: 964ef5a3ac09
+Create Date: 2025-10-09
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '20251009_timezone'
+down_revision = '964ef5a3ac09'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Add timezone column to tenants table for accurate scheduling"""
+    # Add timezone column with default Europe/Madrid
+    op.add_column('tenants', sa.Column('timezone', sa.String(50), nullable=False, server_default='Europe/Madrid'))
+
+
+def downgrade() -> None:
+    """Remove timezone column from tenants table"""
+    op.drop_column('tenants', 'timezone')