REFACTOR production scheduler

This commit is contained in:
Urtzi Alfaro
2025-10-09 18:01:24 +02:00
parent 3c689b4f98
commit b420af32c5
13 changed files with 4046 additions and 6 deletions

View File

@@ -12,6 +12,7 @@ from sqlalchemy import text
from app.core.config import settings
from app.core.database import database_manager
from app.services.production_alert_service import ProductionAlertService
from app.services.production_scheduler_service import ProductionSchedulerService
from shared.service_base import StandardFastAPIService
# Import standardized routers
@@ -56,8 +57,9 @@ class ProductionService(StandardFastAPIService):
]
self.alert_service = None
self.scheduler_service = None
# Create custom checks for alert service
# Create custom checks for services
async def check_alert_service():
"""Check production alert service health"""
try:
@@ -66,6 +68,14 @@ class ProductionService(StandardFastAPIService):
self.logger.error("Alert service health check failed", error=str(e))
return False
async def check_scheduler_service():
"""Check production scheduler service health"""
try:
return bool(self.scheduler_service) if self.scheduler_service else False
except Exception as e:
self.logger.error("Scheduler service health check failed", error=str(e))
return False
super().__init__(
service_name=settings.SERVICE_NAME,
app_name=settings.APP_NAME,
@@ -74,7 +84,10 @@ class ProductionService(StandardFastAPIService):
api_prefix="", # Empty because RouteBuilder already includes /api/v1
database_manager=database_manager,
expected_tables=production_expected_tables,
custom_health_checks={"alert_service": check_alert_service}
custom_health_checks={
"alert_service": check_alert_service,
"scheduler_service": check_scheduler_service
}
)
async def on_startup(self, app: FastAPI):
@@ -84,11 +97,22 @@ class ProductionService(StandardFastAPIService):
await self.alert_service.start()
self.logger.info("Production alert service started")
# Store alert service in app state
# Initialize production scheduler service
self.scheduler_service = ProductionSchedulerService(settings)
await self.scheduler_service.start()
self.logger.info("Production scheduler service started")
# Store services in app state
app.state.alert_service = self.alert_service
app.state.scheduler_service = self.scheduler_service
async def on_shutdown(self, app: FastAPI):
"""Custom shutdown logic for production service"""
"""Custom startup logic for production service"""
# Stop scheduler service
if self.scheduler_service:
await self.scheduler_service.stop()
self.logger.info("Scheduler service stopped")
# Stop alert service
if self.alert_service:
await self.alert_service.stop()
@@ -100,6 +124,7 @@ class ProductionService(StandardFastAPIService):
"production_planning",
"batch_management",
"production_scheduling",
"automated_daily_scheduling", # NEW: Automated scheduler
"quality_control",
"equipment_management",
"capacity_planning",
@@ -144,6 +169,21 @@ service.add_router(production_dashboard.router)
service.add_router(analytics.router)
@app.post("/test/production-scheduler")
async def test_production_scheduler():
"""Test endpoint to manually trigger production scheduler"""
try:
if hasattr(app.state, 'scheduler_service'):
scheduler_service = app.state.scheduler_service
await scheduler_service.test_production_schedule_generation()
return {"message": "Production scheduler test triggered successfully"}
else:
return {"error": "Scheduler service not available"}
except Exception as e:
service.logger.error("Error testing production scheduler", error=str(e))
return {"error": f"Failed to trigger scheduler test: {str(e)}"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(

View File

@@ -0,0 +1,493 @@
# services/production/app/services/production_scheduler_service.py
"""
Production Scheduler Service - Daily production planning automation
Automatically generates daily production schedules for all active tenants based on:
- Demand forecasts from Orders Service
- Current inventory levels
- Production capacity
- Recipe requirements
Runs daily at 5:30 AM (before procurement @ 6:00 AM) to ensure production
plans are ready for the day ahead.
"""
import asyncio
from datetime import datetime, timedelta, date
from typing import List, Dict, Any, Optional
from uuid import UUID
from decimal import Decimal
import structlog
from apscheduler.triggers.cron import CronTrigger
from zoneinfo import ZoneInfo
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
from shared.database.base import create_database_manager
from app.services.production_service import ProductionService
from app.schemas.production import ProductionScheduleCreate, ProductionBatchCreate
from app.models.production import ProductionStatus, ProductionPriority
logger = structlog.get_logger()
class ProductionSchedulerService(BaseAlertService, AlertServiceMixin):
"""
Production scheduler service for automated daily production planning
Extends BaseAlertService to use proven scheduling infrastructure
"""
def __init__(self, config):
super().__init__(config)
self.production_service = None
async def start(self):
"""Initialize scheduler and production service"""
await super().start()
# Store database manager for session creation
from app.core.database import database_manager
self.db_manager = database_manager
logger.info("Production scheduler service started", service=self.config.SERVICE_NAME)
def setup_scheduled_checks(self):
"""Configure daily production planning jobs"""
# Daily production planning at 5:30 AM (before procurement)
# This ensures production plans are ready before procurement plans
self.scheduler.add_job(
func=self.run_daily_production_planning,
trigger=CronTrigger(hour=5, minute=30),
id="daily_production_planning",
name="Daily Production Planning",
misfire_grace_time=300, # 5 minutes grace period
coalesce=True, # Combine missed runs
max_instances=1 # Only one instance at a time
)
# Stale schedule cleanup at 5:50 AM
self.scheduler.add_job(
func=self.run_stale_schedule_cleanup,
trigger=CronTrigger(hour=5, minute=50),
id="stale_schedule_cleanup",
name="Stale Schedule Cleanup",
misfire_grace_time=300,
coalesce=True,
max_instances=1
)
# Test job for development (every 30 minutes if DEBUG enabled)
if getattr(self.config, 'DEBUG', False) or getattr(self.config, 'PRODUCTION_TEST_MODE', False):
self.scheduler.add_job(
func=self.run_daily_production_planning,
trigger=CronTrigger(minute='*/30'),
id="test_production_planning",
name="Test Production Planning (30min)",
misfire_grace_time=300,
coalesce=True,
max_instances=1
)
logger.info("⚡ Test production planning job added (every 30 minutes)")
logger.info("📅 Production scheduled jobs configured",
jobs_count=len(self.scheduler.get_jobs()))
async def run_daily_production_planning(self):
"""
Execute daily production planning for all active tenants
Processes tenants in parallel with individual timeouts
"""
if not self.is_leader:
logger.debug("Skipping production planning - not leader")
return
try:
self._checks_performed += 1
logger.info("🔄 Starting daily production planning execution",
timestamp=datetime.now().isoformat())
# Get active non-demo tenants
active_tenants = await self.get_active_tenants()
if not active_tenants:
logger.info("No active tenants found for production planning")
return
logger.info(f"Processing {len(active_tenants)} tenants in parallel")
# Create tasks with timeout for each tenant
tasks = [
self._process_tenant_with_timeout(tenant_id, timeout_seconds=180)
for tenant_id in active_tenants
]
# Execute all tasks in parallel
results = await asyncio.gather(*tasks, return_exceptions=True)
# Count successes and failures
processed_tenants = sum(1 for r in results if r is True)
failed_tenants = sum(1 for r in results if isinstance(r, Exception) or r is False)
logger.info("🎯 Daily production planning completed",
total_tenants=len(active_tenants),
processed_tenants=processed_tenants,
failed_tenants=failed_tenants)
except Exception as e:
self._errors_count += 1
logger.error("💥 Daily production planning failed completely", error=str(e))
async def _process_tenant_with_timeout(self, tenant_id: UUID, timeout_seconds: int = 180) -> bool:
"""
Process tenant production planning with timeout
Returns True on success, False or raises exception on failure
"""
try:
await asyncio.wait_for(
self.process_tenant_production(tenant_id),
timeout=timeout_seconds
)
logger.info("✅ Successfully processed tenant", tenant_id=str(tenant_id))
return True
except asyncio.TimeoutError:
logger.error("⏱️ Tenant processing timed out",
tenant_id=str(tenant_id),
timeout=timeout_seconds)
return False
except Exception as e:
logger.error("❌ Error processing tenant production",
tenant_id=str(tenant_id),
error=str(e))
raise
async def process_tenant_production(self, tenant_id: UUID):
"""Process production planning for a specific tenant"""
try:
# Get tenant timezone for accurate date calculation
tenant_tz = await self._get_tenant_timezone(tenant_id)
# Calculate target date in tenant's timezone
target_date = datetime.now(ZoneInfo(tenant_tz)).date()
logger.info("Processing production for tenant",
tenant_id=str(tenant_id),
target_date=str(target_date),
timezone=tenant_tz)
# Check if schedule already exists for this date
async with self.db_manager.get_session() as session:
production_service = ProductionService(self.db_manager, self.config)
# Check for existing schedule
existing_schedule = await self._get_schedule_by_date(
session, tenant_id, target_date
)
if existing_schedule:
logger.info("📋 Production schedule already exists, skipping",
tenant_id=str(tenant_id),
schedule_date=str(target_date),
schedule_id=str(existing_schedule.get('id')))
return
# Calculate daily requirements
requirements = await production_service.calculate_daily_requirements(
tenant_id, target_date
)
if not requirements.production_plan:
logger.info("No production requirements for date",
tenant_id=str(tenant_id),
date=str(target_date))
return
# Create production schedule
schedule_data = ProductionScheduleCreate(
schedule_date=target_date,
schedule_name=f"Daily Production - {target_date.strftime('%Y-%m-%d')}",
status="draft",
notes=f"Auto-generated daily production schedule for {target_date}",
total_batches=len(requirements.production_plan),
auto_generated=True
)
schedule = await production_service.create_production_schedule(
tenant_id, schedule_data
)
# Create production batches from requirements
batches_created = 0
for item in requirements.production_plan:
try:
batch_data = await self._create_batch_from_requirement(
item, schedule.id, target_date
)
batch = await production_service.create_production_batch(
tenant_id, batch_data
)
batches_created += 1
except Exception as e:
logger.error("Error creating batch from requirement",
tenant_id=str(tenant_id),
product=item.get('product_name'),
error=str(e))
# Send notification about new schedule
await self.send_production_schedule_notification(
tenant_id, schedule.id, batches_created
)
logger.info("🎉 Production schedule created successfully",
tenant_id=str(tenant_id),
schedule_id=str(schedule.id),
schedule_date=str(target_date),
batches_created=batches_created)
except Exception as e:
logger.error("💥 Error processing tenant production",
tenant_id=str(tenant_id),
error=str(e))
raise
async def _get_tenant_timezone(self, tenant_id: UUID) -> str:
"""Get tenant's timezone, fallback to UTC if not configured"""
try:
from services.tenant.app.models.tenants import Tenant
from sqlalchemy import select
import os
tenant_db_url = os.getenv("TENANT_DATABASE_URL")
if not tenant_db_url:
logger.warning("TENANT_DATABASE_URL not set, using UTC")
return "UTC"
tenant_db = create_database_manager(tenant_db_url, "tenant-tz-lookup")
async with tenant_db.get_session() as session:
result = await session.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalars().first()
if tenant and hasattr(tenant, 'timezone') and tenant.timezone:
return tenant.timezone
# Default to Europe/Madrid for Spanish bakeries
return "Europe/Madrid"
except Exception as e:
logger.warning("Could not fetch tenant timezone, using UTC",
tenant_id=str(tenant_id), error=str(e))
return "UTC"
async def _get_schedule_by_date(self, session, tenant_id: UUID, schedule_date: date) -> Optional[Dict]:
"""Check if production schedule exists for date"""
try:
from sqlalchemy import select, and_
from app.models.production import ProductionSchedule
result = await session.execute(
select(ProductionSchedule).where(
and_(
ProductionSchedule.tenant_id == tenant_id,
ProductionSchedule.schedule_date == schedule_date
)
)
)
schedule = result.scalars().first()
if schedule:
return {"id": schedule.id, "status": schedule.status}
return None
except Exception as e:
logger.error("Error checking existing schedule", error=str(e))
return None
async def _create_batch_from_requirement(
self,
requirement: Dict[str, Any],
schedule_id: UUID,
target_date: date
) -> ProductionBatchCreate:
"""Create batch data from production requirement"""
# Map urgency to priority
urgency_to_priority = {
"high": ProductionPriority.HIGH,
"medium": ProductionPriority.MEDIUM,
"low": ProductionPriority.LOW
}
priority = urgency_to_priority.get(requirement.get('urgency', 'medium'), ProductionPriority.MEDIUM)
# Calculate planned times (start at 6 AM, estimate 2 hours per batch)
planned_start = datetime.combine(target_date, datetime.min.time().replace(hour=6))
planned_duration = 120 # 2 hours default
return ProductionBatchCreate(
schedule_id=schedule_id,
product_id=UUID(requirement['product_id']),
product_name=requirement['product_name'],
planned_quantity=Decimal(str(requirement['recommended_production'])),
unit_of_measure="units",
priority=priority,
status=ProductionStatus.PLANNED,
planned_start_time=planned_start,
planned_duration_minutes=planned_duration,
notes=f"Auto-generated from demand forecast. Urgency: {requirement.get('urgency', 'medium')}",
auto_generated=True
)
async def run_stale_schedule_cleanup(self):
"""
Clean up stale production schedules and send reminders
"""
if not self.is_leader:
logger.debug("Skipping stale schedule cleanup - not leader")
return
try:
logger.info("🧹 Starting stale schedule cleanup")
active_tenants = await self.get_active_tenants()
if not active_tenants:
logger.info("No active tenants found for cleanup")
return
total_archived = 0
total_cancelled = 0
total_escalated = 0
# Process each tenant's stale schedules
for tenant_id in active_tenants:
try:
stats = await self._cleanup_tenant_schedules(tenant_id)
total_archived += stats.get('archived', 0)
total_cancelled += stats.get('cancelled', 0)
total_escalated += stats.get('escalated', 0)
except Exception as e:
logger.error("Error cleaning up tenant schedules",
tenant_id=str(tenant_id),
error=str(e))
logger.info("✅ Stale schedule cleanup completed",
archived=total_archived,
cancelled=total_cancelled,
escalated=total_escalated)
except Exception as e:
self._errors_count += 1
logger.error("💥 Stale schedule cleanup failed", error=str(e))
async def _cleanup_tenant_schedules(self, tenant_id: UUID) -> Dict[str, int]:
"""Cleanup stale schedules for a specific tenant"""
stats = {"archived": 0, "cancelled": 0, "escalated": 0}
try:
async with self.db_manager.get_session() as session:
from sqlalchemy import select, and_
from app.models.production import ProductionSchedule
today = date.today()
# Get all schedules for tenant
result = await session.execute(
select(ProductionSchedule).where(
ProductionSchedule.tenant_id == tenant_id
)
)
schedules = result.scalars().all()
for schedule in schedules:
schedule_age_days = (today - schedule.schedule_date).days
# Archive completed schedules older than 90 days
if schedule.status == "completed" and schedule_age_days > 90:
schedule.archived = True
stats["archived"] += 1
# Cancel draft schedules older than 7 days
elif schedule.status == "draft" and schedule_age_days > 7:
schedule.status = "cancelled"
schedule.notes = (schedule.notes or "") + "\nAuto-cancelled: stale draft schedule"
stats["cancelled"] += 1
# Escalate overdue schedules
elif schedule.schedule_date == today and schedule.status in ['draft', 'pending_approval']:
await self._send_schedule_escalation_alert(tenant_id, schedule.id)
stats["escalated"] += 1
await session.commit()
except Exception as e:
logger.error("Error in tenant schedule cleanup",
tenant_id=str(tenant_id), error=str(e))
return stats
async def send_production_schedule_notification(
self,
tenant_id: UUID,
schedule_id: UUID,
batches_count: int
):
"""Send notification about new production schedule"""
try:
alert_data = {
"type": "production_schedule_created",
"severity": "low",
"title": "Nuevo Plan de Producción Generado",
"message": f"Plan de producción diario creado con {batches_count} lotes programados",
"metadata": {
"tenant_id": str(tenant_id),
"schedule_id": str(schedule_id),
"batches_count": batches_count,
"auto_generated": True
}
}
await self.publish_item(tenant_id, alert_data, item_type='alert')
except Exception as e:
logger.error("Error sending schedule notification",
tenant_id=str(tenant_id),
error=str(e))
async def _send_schedule_escalation_alert(self, tenant_id: UUID, schedule_id: UUID):
"""Send escalation alert for overdue schedule"""
try:
alert_data = {
"type": "schedule_escalation",
"severity": "high",
"title": "Plan de Producción Vencido",
"message": "Plan de producción para hoy no ha sido procesado - Requiere atención urgente",
"metadata": {
"tenant_id": str(tenant_id),
"schedule_id": str(schedule_id),
"escalation_level": "urgent"
}
}
await self.publish_item(tenant_id, alert_data, item_type='alert')
except Exception as e:
logger.error("Error sending escalation alert", error=str(e))
async def test_production_schedule_generation(self):
"""Test method to manually trigger production planning"""
active_tenants = await self.get_active_tenants()
if not active_tenants:
logger.error("No active tenants found for testing production schedule generation")
return
test_tenant_id = active_tenants[0]
logger.info("Testing production schedule generation", tenant_id=str(test_tenant_id))
try:
await self.process_tenant_production(test_tenant_id)
logger.info("Test production schedule generation completed successfully")
except Exception as e:
logger.error("Test production schedule generation failed",
error=str(e), tenant_id=str(test_tenant_id))