REFACTOR production scheduler

2025-10-09 18:01:24 +02:00
parent 3c689b4f98
commit b420af32c5
13 changed files with 4046 additions and 6 deletions
--- a/services/production/app/main.py
+++ b/services/production/app/main.py
@@ -12,6 +12,7 @@ from sqlalchemy import text
 from app.core.config import settings
 from app.core.database import database_manager
 from app.services.production_alert_service import ProductionAlertService
+from app.services.production_scheduler_service import ProductionSchedulerService
 from shared.service_base import StandardFastAPIService

 # Import standardized routers
@@ -56,8 +57,9 @@ class ProductionService(StandardFastAPIService):
        ]

        self.alert_service = None
+        self.scheduler_service = None

-        # Create custom checks for alert service
+        # Create custom checks for services
        async def check_alert_service():
            """Check production alert service health"""
            try:
@@ -66,6 +68,14 @@ class ProductionService(StandardFastAPIService):
                self.logger.error("Alert service health check failed", error=str(e))
                return False

+        async def check_scheduler_service():
+            """Check production scheduler service health"""
+            try:
+                return bool(self.scheduler_service) if self.scheduler_service else False
+            except Exception as e:
+                self.logger.error("Scheduler service health check failed", error=str(e))
+                return False
+
        super().__init__(
            service_name=settings.SERVICE_NAME,
            app_name=settings.APP_NAME,
@@ -74,7 +84,10 @@ class ProductionService(StandardFastAPIService):
            api_prefix="",  # Empty because RouteBuilder already includes /api/v1
            database_manager=database_manager,
            expected_tables=production_expected_tables,
-            custom_health_checks={"alert_service": check_alert_service}
+            custom_health_checks={
+                "alert_service": check_alert_service,
+                "scheduler_service": check_scheduler_service
+            }
        )

    async def on_startup(self, app: FastAPI):
@@ -84,11 +97,22 @@ class ProductionService(StandardFastAPIService):
        await self.alert_service.start()
        self.logger.info("Production alert service started")

-        # Store alert service in app state
+        # Initialize production scheduler service
+        self.scheduler_service = ProductionSchedulerService(settings)
+        await self.scheduler_service.start()
+        self.logger.info("Production scheduler service started")
+
+        # Store services in app state
        app.state.alert_service = self.alert_service
+        app.state.scheduler_service = self.scheduler_service

    async def on_shutdown(self, app: FastAPI):
-        """Custom shutdown logic for production service"""
+        """Custom startup logic for production service"""
+        # Stop scheduler service
+        if self.scheduler_service:
+            await self.scheduler_service.stop()
+            self.logger.info("Scheduler service stopped")
+
        # Stop alert service
        if self.alert_service:
            await self.alert_service.stop()
@@ -100,6 +124,7 @@ class ProductionService(StandardFastAPIService):
            "production_planning",
            "batch_management",
            "production_scheduling",
+            "automated_daily_scheduling",  # NEW: Automated scheduler
            "quality_control",
            "equipment_management",
            "capacity_planning",
@@ -144,6 +169,21 @@ service.add_router(production_dashboard.router)
 service.add_router(analytics.router)


+@app.post("/test/production-scheduler")
+async def test_production_scheduler():
+    """Test endpoint to manually trigger production scheduler"""
+    try:
+        if hasattr(app.state, 'scheduler_service'):
+            scheduler_service = app.state.scheduler_service
+            await scheduler_service.test_production_schedule_generation()
+            return {"message": "Production scheduler test triggered successfully"}
+        else:
+            return {"error": "Scheduler service not available"}
+    except Exception as e:
+        service.logger.error("Error testing production scheduler", error=str(e))
+        return {"error": f"Failed to trigger scheduler test: {str(e)}"}
+
+
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
--- a/services/production/app/services/production_scheduler_service.py
+++ b/services/production/app/services/production_scheduler_service.py
@@ -0,0 +1,493 @@
+# services/production/app/services/production_scheduler_service.py
+"""
+Production Scheduler Service - Daily production planning automation
+
+Automatically generates daily production schedules for all active tenants based on:
+- Demand forecasts from Orders Service
+- Current inventory levels
+- Production capacity
+- Recipe requirements
+
+Runs daily at 5:30 AM (before procurement @ 6:00 AM) to ensure production
+plans are ready for the day ahead.
+"""
+
+import asyncio
+from datetime import datetime, timedelta, date
+from typing import List, Dict, Any, Optional
+from uuid import UUID
+from decimal import Decimal
+import structlog
+from apscheduler.triggers.cron import CronTrigger
+from zoneinfo import ZoneInfo
+
+from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
+from shared.database.base import create_database_manager
+from app.services.production_service import ProductionService
+from app.schemas.production import ProductionScheduleCreate, ProductionBatchCreate
+from app.models.production import ProductionStatus, ProductionPriority
+
+logger = structlog.get_logger()
+
+
+class ProductionSchedulerService(BaseAlertService, AlertServiceMixin):
+    """
+    Production scheduler service for automated daily production planning
+    Extends BaseAlertService to use proven scheduling infrastructure
+    """
+
+    def __init__(self, config):
+        super().__init__(config)
+        self.production_service = None
+
+    async def start(self):
+        """Initialize scheduler and production service"""
+        await super().start()
+
+        # Store database manager for session creation
+        from app.core.database import database_manager
+        self.db_manager = database_manager
+
+        logger.info("Production scheduler service started", service=self.config.SERVICE_NAME)
+
+    def setup_scheduled_checks(self):
+        """Configure daily production planning jobs"""
+
+        # Daily production planning at 5:30 AM (before procurement)
+        # This ensures production plans are ready before procurement plans
+        self.scheduler.add_job(
+            func=self.run_daily_production_planning,
+            trigger=CronTrigger(hour=5, minute=30),
+            id="daily_production_planning",
+            name="Daily Production Planning",
+            misfire_grace_time=300,  # 5 minutes grace period
+            coalesce=True,  # Combine missed runs
+            max_instances=1  # Only one instance at a time
+        )
+
+        # Stale schedule cleanup at 5:50 AM
+        self.scheduler.add_job(
+            func=self.run_stale_schedule_cleanup,
+            trigger=CronTrigger(hour=5, minute=50),
+            id="stale_schedule_cleanup",
+            name="Stale Schedule Cleanup",
+            misfire_grace_time=300,
+            coalesce=True,
+            max_instances=1
+        )
+
+        # Test job for development (every 30 minutes if DEBUG enabled)
+        if getattr(self.config, 'DEBUG', False) or getattr(self.config, 'PRODUCTION_TEST_MODE', False):
+            self.scheduler.add_job(
+                func=self.run_daily_production_planning,
+                trigger=CronTrigger(minute='*/30'),
+                id="test_production_planning",
+                name="Test Production Planning (30min)",
+                misfire_grace_time=300,
+                coalesce=True,
+                max_instances=1
+            )
+            logger.info("⚡ Test production planning job added (every 30 minutes)")
+
+        logger.info("📅 Production scheduled jobs configured",
+                   jobs_count=len(self.scheduler.get_jobs()))
+
+    async def run_daily_production_planning(self):
+        """
+        Execute daily production planning for all active tenants
+        Processes tenants in parallel with individual timeouts
+        """
+        if not self.is_leader:
+            logger.debug("Skipping production planning - not leader")
+            return
+
+        try:
+            self._checks_performed += 1
+            logger.info("🔄 Starting daily production planning execution",
+                       timestamp=datetime.now().isoformat())
+
+            # Get active non-demo tenants
+            active_tenants = await self.get_active_tenants()
+            if not active_tenants:
+                logger.info("No active tenants found for production planning")
+                return
+
+            logger.info(f"Processing {len(active_tenants)} tenants in parallel")
+
+            # Create tasks with timeout for each tenant
+            tasks = [
+                self._process_tenant_with_timeout(tenant_id, timeout_seconds=180)
+                for tenant_id in active_tenants
+            ]
+
+            # Execute all tasks in parallel
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+
+            # Count successes and failures
+            processed_tenants = sum(1 for r in results if r is True)
+            failed_tenants = sum(1 for r in results if isinstance(r, Exception) or r is False)
+
+            logger.info("🎯 Daily production planning completed",
+                       total_tenants=len(active_tenants),
+                       processed_tenants=processed_tenants,
+                       failed_tenants=failed_tenants)
+
+        except Exception as e:
+            self._errors_count += 1
+            logger.error("💥 Daily production planning failed completely", error=str(e))
+
+    async def _process_tenant_with_timeout(self, tenant_id: UUID, timeout_seconds: int = 180) -> bool:
+        """
+        Process tenant production planning with timeout
+        Returns True on success, False or raises exception on failure
+        """
+        try:
+            await asyncio.wait_for(
+                self.process_tenant_production(tenant_id),
+                timeout=timeout_seconds
+            )
+            logger.info("✅ Successfully processed tenant", tenant_id=str(tenant_id))
+            return True
+        except asyncio.TimeoutError:
+            logger.error("⏱️ Tenant processing timed out",
+                        tenant_id=str(tenant_id),
+                        timeout=timeout_seconds)
+            return False
+        except Exception as e:
+            logger.error("❌ Error processing tenant production",
+                        tenant_id=str(tenant_id),
+                        error=str(e))
+            raise
+
+    async def process_tenant_production(self, tenant_id: UUID):
+        """Process production planning for a specific tenant"""
+        try:
+            # Get tenant timezone for accurate date calculation
+            tenant_tz = await self._get_tenant_timezone(tenant_id)
+
+            # Calculate target date in tenant's timezone
+            target_date = datetime.now(ZoneInfo(tenant_tz)).date()
+
+            logger.info("Processing production for tenant",
+                       tenant_id=str(tenant_id),
+                       target_date=str(target_date),
+                       timezone=tenant_tz)
+
+            # Check if schedule already exists for this date
+            async with self.db_manager.get_session() as session:
+                production_service = ProductionService(self.db_manager, self.config)
+
+                # Check for existing schedule
+                existing_schedule = await self._get_schedule_by_date(
+                    session, tenant_id, target_date
+                )
+
+                if existing_schedule:
+                    logger.info("📋 Production schedule already exists, skipping",
+                               tenant_id=str(tenant_id),
+                               schedule_date=str(target_date),
+                               schedule_id=str(existing_schedule.get('id')))
+                    return
+
+                # Calculate daily requirements
+                requirements = await production_service.calculate_daily_requirements(
+                    tenant_id, target_date
+                )
+
+                if not requirements.production_plan:
+                    logger.info("No production requirements for date",
+                               tenant_id=str(tenant_id),
+                               date=str(target_date))
+                    return
+
+                # Create production schedule
+                schedule_data = ProductionScheduleCreate(
+                    schedule_date=target_date,
+                    schedule_name=f"Daily Production - {target_date.strftime('%Y-%m-%d')}",
+                    status="draft",
+                    notes=f"Auto-generated daily production schedule for {target_date}",
+                    total_batches=len(requirements.production_plan),
+                    auto_generated=True
+                )
+
+                schedule = await production_service.create_production_schedule(
+                    tenant_id, schedule_data
+                )
+
+                # Create production batches from requirements
+                batches_created = 0
+                for item in requirements.production_plan:
+                    try:
+                        batch_data = await self._create_batch_from_requirement(
+                            item, schedule.id, target_date
+                        )
+
+                        batch = await production_service.create_production_batch(
+                            tenant_id, batch_data
+                        )
+                        batches_created += 1
+
+                    except Exception as e:
+                        logger.error("Error creating batch from requirement",
+                                   tenant_id=str(tenant_id),
+                                   product=item.get('product_name'),
+                                   error=str(e))
+
+                # Send notification about new schedule
+                await self.send_production_schedule_notification(
+                    tenant_id, schedule.id, batches_created
+                )
+
+                logger.info("🎉 Production schedule created successfully",
+                           tenant_id=str(tenant_id),
+                           schedule_id=str(schedule.id),
+                           schedule_date=str(target_date),
+                           batches_created=batches_created)
+
+        except Exception as e:
+            logger.error("💥 Error processing tenant production",
+                        tenant_id=str(tenant_id),
+                        error=str(e))
+            raise
+
+    async def _get_tenant_timezone(self, tenant_id: UUID) -> str:
+        """Get tenant's timezone, fallback to UTC if not configured"""
+        try:
+            from services.tenant.app.models.tenants import Tenant
+            from sqlalchemy import select
+            import os
+
+            tenant_db_url = os.getenv("TENANT_DATABASE_URL")
+            if not tenant_db_url:
+                logger.warning("TENANT_DATABASE_URL not set, using UTC")
+                return "UTC"
+
+            tenant_db = create_database_manager(tenant_db_url, "tenant-tz-lookup")
+
+            async with tenant_db.get_session() as session:
+                result = await session.execute(
+                    select(Tenant).where(Tenant.id == tenant_id)
+                )
+                tenant = result.scalars().first()
+
+                if tenant and hasattr(tenant, 'timezone') and tenant.timezone:
+                    return tenant.timezone
+
+                # Default to Europe/Madrid for Spanish bakeries
+                return "Europe/Madrid"
+
+        except Exception as e:
+            logger.warning("Could not fetch tenant timezone, using UTC",
+                         tenant_id=str(tenant_id), error=str(e))
+            return "UTC"
+
+    async def _get_schedule_by_date(self, session, tenant_id: UUID, schedule_date: date) -> Optional[Dict]:
+        """Check if production schedule exists for date"""
+        try:
+            from sqlalchemy import select, and_
+            from app.models.production import ProductionSchedule
+
+            result = await session.execute(
+                select(ProductionSchedule).where(
+                    and_(
+                        ProductionSchedule.tenant_id == tenant_id,
+                        ProductionSchedule.schedule_date == schedule_date
+                    )
+                )
+            )
+            schedule = result.scalars().first()
+
+            if schedule:
+                return {"id": schedule.id, "status": schedule.status}
+            return None
+
+        except Exception as e:
+            logger.error("Error checking existing schedule", error=str(e))
+            return None
+
+    async def _create_batch_from_requirement(
+        self,
+        requirement: Dict[str, Any],
+        schedule_id: UUID,
+        target_date: date
+    ) -> ProductionBatchCreate:
+        """Create batch data from production requirement"""
+
+        # Map urgency to priority
+        urgency_to_priority = {
+            "high": ProductionPriority.HIGH,
+            "medium": ProductionPriority.MEDIUM,
+            "low": ProductionPriority.LOW
+        }
+        priority = urgency_to_priority.get(requirement.get('urgency', 'medium'), ProductionPriority.MEDIUM)
+
+        # Calculate planned times (start at 6 AM, estimate 2 hours per batch)
+        planned_start = datetime.combine(target_date, datetime.min.time().replace(hour=6))
+        planned_duration = 120  # 2 hours default
+
+        return ProductionBatchCreate(
+            schedule_id=schedule_id,
+            product_id=UUID(requirement['product_id']),
+            product_name=requirement['product_name'],
+            planned_quantity=Decimal(str(requirement['recommended_production'])),
+            unit_of_measure="units",
+            priority=priority,
+            status=ProductionStatus.PLANNED,
+            planned_start_time=planned_start,
+            planned_duration_minutes=planned_duration,
+            notes=f"Auto-generated from demand forecast. Urgency: {requirement.get('urgency', 'medium')}",
+            auto_generated=True
+        )
+
+    async def run_stale_schedule_cleanup(self):
+        """
+        Clean up stale production schedules and send reminders
+        """
+        if not self.is_leader:
+            logger.debug("Skipping stale schedule cleanup - not leader")
+            return
+
+        try:
+            logger.info("🧹 Starting stale schedule cleanup")
+
+            active_tenants = await self.get_active_tenants()
+            if not active_tenants:
+                logger.info("No active tenants found for cleanup")
+                return
+
+            total_archived = 0
+            total_cancelled = 0
+            total_escalated = 0
+
+            # Process each tenant's stale schedules
+            for tenant_id in active_tenants:
+                try:
+                    stats = await self._cleanup_tenant_schedules(tenant_id)
+                    total_archived += stats.get('archived', 0)
+                    total_cancelled += stats.get('cancelled', 0)
+                    total_escalated += stats.get('escalated', 0)
+
+                except Exception as e:
+                    logger.error("Error cleaning up tenant schedules",
+                               tenant_id=str(tenant_id),
+                               error=str(e))
+
+            logger.info("✅ Stale schedule cleanup completed",
+                       archived=total_archived,
+                       cancelled=total_cancelled,
+                       escalated=total_escalated)
+
+        except Exception as e:
+            self._errors_count += 1
+            logger.error("💥 Stale schedule cleanup failed", error=str(e))
+
+    async def _cleanup_tenant_schedules(self, tenant_id: UUID) -> Dict[str, int]:
+        """Cleanup stale schedules for a specific tenant"""
+        stats = {"archived": 0, "cancelled": 0, "escalated": 0}
+
+        try:
+            async with self.db_manager.get_session() as session:
+                from sqlalchemy import select, and_
+                from app.models.production import ProductionSchedule
+
+                today = date.today()
+
+                # Get all schedules for tenant
+                result = await session.execute(
+                    select(ProductionSchedule).where(
+                        ProductionSchedule.tenant_id == tenant_id
+                    )
+                )
+                schedules = result.scalars().all()
+
+                for schedule in schedules:
+                    schedule_age_days = (today - schedule.schedule_date).days
+
+                    # Archive completed schedules older than 90 days
+                    if schedule.status == "completed" and schedule_age_days > 90:
+                        schedule.archived = True
+                        stats["archived"] += 1
+
+                    # Cancel draft schedules older than 7 days
+                    elif schedule.status == "draft" and schedule_age_days > 7:
+                        schedule.status = "cancelled"
+                        schedule.notes = (schedule.notes or "") + "\nAuto-cancelled: stale draft schedule"
+                        stats["cancelled"] += 1
+
+                    # Escalate overdue schedules
+                    elif schedule.schedule_date == today and schedule.status in ['draft', 'pending_approval']:
+                        await self._send_schedule_escalation_alert(tenant_id, schedule.id)
+                        stats["escalated"] += 1
+
+                await session.commit()
+
+        except Exception as e:
+            logger.error("Error in tenant schedule cleanup",
+                       tenant_id=str(tenant_id), error=str(e))
+
+        return stats
+
+    async def send_production_schedule_notification(
+        self,
+        tenant_id: UUID,
+        schedule_id: UUID,
+        batches_count: int
+    ):
+        """Send notification about new production schedule"""
+        try:
+            alert_data = {
+                "type": "production_schedule_created",
+                "severity": "low",
+                "title": "Nuevo Plan de Producción Generado",
+                "message": f"Plan de producción diario creado con {batches_count} lotes programados",
+                "metadata": {
+                    "tenant_id": str(tenant_id),
+                    "schedule_id": str(schedule_id),
+                    "batches_count": batches_count,
+                    "auto_generated": True
+                }
+            }
+
+            await self.publish_item(tenant_id, alert_data, item_type='alert')
+
+        except Exception as e:
+            logger.error("Error sending schedule notification",
+                        tenant_id=str(tenant_id),
+                        error=str(e))
+
+    async def _send_schedule_escalation_alert(self, tenant_id: UUID, schedule_id: UUID):
+        """Send escalation alert for overdue schedule"""
+        try:
+            alert_data = {
+                "type": "schedule_escalation",
+                "severity": "high",
+                "title": "Plan de Producción Vencido",
+                "message": "Plan de producción para hoy no ha sido procesado - Requiere atención urgente",
+                "metadata": {
+                    "tenant_id": str(tenant_id),
+                    "schedule_id": str(schedule_id),
+                    "escalation_level": "urgent"
+                }
+            }
+
+            await self.publish_item(tenant_id, alert_data, item_type='alert')
+
+        except Exception as e:
+            logger.error("Error sending escalation alert", error=str(e))
+
+    async def test_production_schedule_generation(self):
+        """Test method to manually trigger production planning"""
+        active_tenants = await self.get_active_tenants()
+        if not active_tenants:
+            logger.error("No active tenants found for testing production schedule generation")
+            return
+
+        test_tenant_id = active_tenants[0]
+        logger.info("Testing production schedule generation", tenant_id=str(test_tenant_id))
+
+        try:
+            await self.process_tenant_production(test_tenant_id)
+            logger.info("Test production schedule generation completed successfully")
+        except Exception as e:
+            logger.error("Test production schedule generation failed",
+                        error=str(e), tenant_id=str(test_tenant_id))