610 lines
23 KiB
Python
610 lines
23 KiB
Python
"""
|
|
Production Scheduler Service
|
|
Background task that periodically checks for production alert conditions
|
|
and triggers appropriate alerts.
|
|
"""
|
|
|
|
import asyncio
|
|
from typing import Dict, Any, List, Optional
|
|
from uuid import UUID
|
|
from datetime import datetime, timedelta
|
|
import structlog
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import text
|
|
|
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
|
from apscheduler.triggers.interval import IntervalTrigger
|
|
|
|
from app.repositories.production_batch_repository import ProductionBatchRepository
|
|
from app.repositories.equipment_repository import EquipmentRepository
|
|
from app.services.production_alert_service import ProductionAlertService
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
class ProductionScheduler:
|
|
"""Production scheduler service that checks for alert conditions"""
|
|
|
|
def __init__(self, alert_service: ProductionAlertService, database_manager: Any):
|
|
self.alert_service = alert_service
|
|
self.database_manager = database_manager
|
|
self.scheduler = AsyncIOScheduler()
|
|
self.check_interval = 300 # 5 minutes
|
|
self.job_id = 'production_scheduler'
|
|
|
|
# Cache de alertas emitidas para evitar duplicados
|
|
self._emitted_alerts: set = set()
|
|
self._alert_cache_ttl = 3600 # 1 hora
|
|
self._last_cache_clear = datetime.utcnow()
|
|
|
|
async def start(self):
|
|
"""Start the production scheduler with APScheduler"""
|
|
if self.scheduler.running:
|
|
logger.warning("Production scheduler is already running")
|
|
return
|
|
|
|
# Add the periodic job
|
|
trigger = IntervalTrigger(seconds=self.check_interval)
|
|
self.scheduler.add_job(
|
|
self._run_scheduler_task,
|
|
trigger=trigger,
|
|
id=self.job_id,
|
|
name="Production Alert Checks",
|
|
max_instances=1 # Prevent overlapping executions
|
|
)
|
|
|
|
# Start the scheduler
|
|
self.scheduler.start()
|
|
logger.info("Production scheduler started", interval_seconds=self.check_interval)
|
|
|
|
async def stop(self):
|
|
"""Stop the production scheduler"""
|
|
if self.scheduler.running:
|
|
self.scheduler.shutdown(wait=True)
|
|
logger.info("Production scheduler stopped")
|
|
else:
|
|
logger.info("Production scheduler already stopped")
|
|
|
|
async def _run_scheduler_task(self):
|
|
"""Run scheduled production alert checks with leader election"""
|
|
# Try to acquire leader lock for this scheduler
|
|
lock_name = f"production_scheduler:{self.database_manager.database_url if hasattr(self.database_manager, 'database_url') else 'default'}"
|
|
lock_id = abs(hash(lock_name)) % (2**31) # Generate a unique integer ID for the lock
|
|
acquired = False
|
|
|
|
try:
|
|
# Try to acquire PostgreSQL advisory lock for leader election
|
|
async with self.database_manager.get_session() as session:
|
|
result = await session.execute(text("SELECT pg_try_advisory_lock(:lock_id)"), {"lock_id": lock_id})
|
|
acquired = True # If no exception, lock was acquired
|
|
|
|
start_time = datetime.now()
|
|
logger.info("Running scheduled production alert checks (as leader)")
|
|
|
|
# Run all alert checks
|
|
alerts_generated = await self.check_all_conditions()
|
|
|
|
duration = (datetime.now() - start_time).total_seconds()
|
|
logger.info(
|
|
"Completed scheduled production alert checks",
|
|
alerts_generated=alerts_generated,
|
|
duration_seconds=round(duration, 2)
|
|
)
|
|
|
|
except Exception as e:
|
|
# If it's a lock acquisition error, log and skip execution (another instance is running)
|
|
error_str = str(e).lower()
|
|
if "lock" in error_str or "timeout" in error_str or "could not acquire" in error_str:
|
|
logger.debug(
|
|
"Skipping production scheduler execution (not leader)",
|
|
lock_name=lock_name
|
|
)
|
|
return # Not an error, just not the leader
|
|
else:
|
|
logger.error(
|
|
"Error in production scheduler task",
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
|
|
finally:
|
|
if acquired:
|
|
# Release the lock
|
|
try:
|
|
async with self.database_manager.get_session() as session:
|
|
await session.execute(text("SELECT pg_advisory_unlock(:lock_id)"), {"lock_id": lock_id})
|
|
await session.commit()
|
|
except Exception as unlock_error:
|
|
logger.warning(
|
|
"Error releasing leader lock (may have been automatically released)",
|
|
error=str(unlock_error)
|
|
)
|
|
|
|
async def check_all_conditions(self) -> int:
|
|
"""
|
|
Check all production alert conditions and trigger alerts.
|
|
|
|
Returns:
|
|
int: Total number of alerts generated
|
|
"""
|
|
if not self.database_manager:
|
|
logger.warning("Database manager not available for production checks")
|
|
return 0
|
|
|
|
total_alerts = 0
|
|
|
|
try:
|
|
async with self.database_manager.get_session() as session:
|
|
# Get repositories
|
|
batch_repo = ProductionBatchRepository(session)
|
|
equipment_repo = EquipmentRepository(session)
|
|
|
|
# Check production delays
|
|
delay_alerts = await self._check_production_delays(batch_repo)
|
|
total_alerts += delay_alerts
|
|
|
|
# Check equipment maintenance
|
|
maintenance_alerts = await self._check_equipment_maintenance(equipment_repo)
|
|
total_alerts += maintenance_alerts
|
|
|
|
# Check batch start delays (batches that should have started but haven't)
|
|
start_delay_alerts = await self._check_batch_start_delays(batch_repo)
|
|
total_alerts += start_delay_alerts
|
|
|
|
logger.info(
|
|
"Production alert checks completed",
|
|
total_alerts=total_alerts,
|
|
production_delays=delay_alerts,
|
|
equipment_maintenance=maintenance_alerts,
|
|
batch_start_delays=start_delay_alerts
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error during production alert checks",
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
|
|
return total_alerts
|
|
|
|
async def _check_production_delays(self, batch_repo: ProductionBatchRepository) -> int:
|
|
"""
|
|
Check for production delays and trigger alerts.
|
|
|
|
Args:
|
|
batch_repo: Production batch repository
|
|
|
|
Returns:
|
|
int: Number of delay alerts generated
|
|
"""
|
|
try:
|
|
# Get delayed batches from repository
|
|
delayed_batches = await batch_repo.get_production_delays()
|
|
|
|
logger.info("Found delayed production batches", count=len(delayed_batches))
|
|
|
|
# Limpiar cache si expiró
|
|
if (datetime.utcnow() - self._last_cache_clear).total_seconds() > self._alert_cache_ttl:
|
|
self._emitted_alerts.clear()
|
|
self._last_cache_clear = datetime.utcnow()
|
|
logger.info("Cleared alert cache due to TTL expiration")
|
|
|
|
alerts_generated = 0
|
|
|
|
for batch in delayed_batches:
|
|
try:
|
|
batch_id = UUID(str(batch["id"]))
|
|
|
|
# Verificar si ya emitimos alerta para este batch
|
|
alert_key = f"delay:{batch_id}"
|
|
if alert_key in self._emitted_alerts:
|
|
logger.debug("Skipping duplicate delay alert", batch_id=str(batch_id))
|
|
continue
|
|
|
|
tenant_id = UUID(str(batch["tenant_id"]))
|
|
delay_minutes = int(batch["delay_minutes"]) if batch.get("delay_minutes") else 0
|
|
affected_orders = int(batch.get("affected_orders", 0))
|
|
|
|
# Emit production delay alert
|
|
await self.alert_service.emit_production_delay(
|
|
tenant_id=tenant_id,
|
|
batch_id=batch_id,
|
|
product_name=batch.get("product_name", "Unknown Product"),
|
|
batch_number=batch.get("batch_number", "Unknown Batch"),
|
|
delay_minutes=delay_minutes,
|
|
affected_orders=affected_orders
|
|
)
|
|
|
|
# Registrar en cache
|
|
self._emitted_alerts.add(alert_key)
|
|
alerts_generated += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error emitting production delay alert",
|
|
batch_id=batch.get("id", "unknown"),
|
|
error=str(e)
|
|
)
|
|
continue
|
|
|
|
return alerts_generated
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking production delays", error=str(e))
|
|
return 0
|
|
|
|
async def _check_equipment_maintenance(self, equipment_repo: EquipmentRepository) -> int:
|
|
"""
|
|
Check for equipment needing maintenance and trigger alerts.
|
|
|
|
Args:
|
|
equipment_repo: Equipment repository
|
|
|
|
Returns:
|
|
int: Number of maintenance alerts generated
|
|
"""
|
|
try:
|
|
# Get equipment that needs maintenance using repository method
|
|
equipment_needing_maintenance = await equipment_repo.get_equipment_needing_maintenance()
|
|
|
|
logger.info(
|
|
"Found equipment needing maintenance",
|
|
count=len(equipment_needing_maintenance)
|
|
)
|
|
|
|
alerts_generated = 0
|
|
|
|
for equipment in equipment_needing_maintenance:
|
|
try:
|
|
equipment_id = UUID(equipment["id"])
|
|
tenant_id = UUID(equipment["tenant_id"])
|
|
days_overdue = int(equipment.get("days_overdue", 0))
|
|
|
|
# Emit equipment maintenance alert
|
|
await self.alert_service.emit_equipment_maintenance_due(
|
|
tenant_id=tenant_id,
|
|
equipment_id=equipment_id,
|
|
equipment_name=equipment.get("name", "Unknown Equipment"),
|
|
equipment_type=equipment.get("type", "unknown"),
|
|
last_maintenance_date=equipment.get("last_maintenance_date"),
|
|
days_overdue=days_overdue
|
|
)
|
|
|
|
alerts_generated += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error emitting equipment maintenance alert",
|
|
equipment_id=equipment.get("id", "unknown"),
|
|
error=str(e)
|
|
)
|
|
continue
|
|
|
|
return alerts_generated
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking equipment maintenance", error=str(e))
|
|
return 0
|
|
|
|
async def _check_batch_start_delays(self, batch_repo: ProductionBatchRepository) -> int:
|
|
"""
|
|
Check for batches that should have started but haven't.
|
|
|
|
Args:
|
|
batch_repo: Production batch repository
|
|
|
|
Returns:
|
|
int: Number of start delay alerts generated
|
|
"""
|
|
try:
|
|
# Get batches that should have started using repository method
|
|
delayed_start_batches = await batch_repo.get_batches_with_delayed_start()
|
|
|
|
logger.info(
|
|
"Found batches with delayed start",
|
|
count=len(delayed_start_batches)
|
|
)
|
|
|
|
alerts_generated = 0
|
|
|
|
for batch in delayed_start_batches:
|
|
try:
|
|
batch_id = UUID(batch["id"])
|
|
|
|
# Verificar si ya emitimos alerta para este batch
|
|
alert_key = f"start_delay:{batch_id}"
|
|
if alert_key in self._emitted_alerts:
|
|
logger.debug("Skipping duplicate start delay alert", batch_id=str(batch_id))
|
|
continue
|
|
|
|
tenant_id = UUID(batch["tenant_id"])
|
|
scheduled_start = batch.get("scheduled_start_time")
|
|
|
|
# Emit batch start delayed alert
|
|
await self.alert_service.emit_batch_start_delayed(
|
|
tenant_id=tenant_id,
|
|
batch_id=batch_id,
|
|
product_name=batch.get("product_name", "Unknown Product"),
|
|
batch_number=batch.get("batch_number", "Unknown Batch"),
|
|
scheduled_start=scheduled_start,
|
|
delay_reason="Batch has not started on time"
|
|
)
|
|
|
|
# Registrar en cache
|
|
self._emitted_alerts.add(alert_key)
|
|
alerts_generated += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error emitting batch start delay alert",
|
|
batch_id=batch.get("id", "unknown"),
|
|
error=str(e)
|
|
)
|
|
continue
|
|
|
|
return alerts_generated
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking batch start delays", error=str(e))
|
|
return 0
|
|
|
|
async def trigger_manual_check(self, tenant_id: Optional[UUID] = None) -> Dict[str, Any]:
|
|
"""
|
|
Manually trigger production alert checks for a specific tenant or all tenants.
|
|
|
|
Args:
|
|
tenant_id: Optional tenant ID to check. If None, checks all tenants.
|
|
|
|
Returns:
|
|
Dict with alert generation results
|
|
"""
|
|
logger.info(
|
|
"Manually triggering production alert checks",
|
|
tenant_id=str(tenant_id) if tenant_id else "all_tenants"
|
|
)
|
|
|
|
try:
|
|
if tenant_id:
|
|
# Run tenant-specific alert checks
|
|
alerts_generated = await self.check_all_conditions_for_tenant(tenant_id)
|
|
else:
|
|
# Run all alert checks across all tenants
|
|
alerts_generated = await self.check_all_conditions()
|
|
|
|
return {
|
|
"success": True,
|
|
"tenant_id": str(tenant_id) if tenant_id else None,
|
|
"alerts_generated": alerts_generated,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"message": "Production alert checks completed successfully"
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error during manual production alert check",
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
return {
|
|
"success": False,
|
|
"tenant_id": str(tenant_id) if tenant_id else None,
|
|
"alerts_generated": 0,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"error": str(e)
|
|
}
|
|
|
|
async def check_all_conditions_for_tenant(self, tenant_id: UUID) -> int:
|
|
"""
|
|
Check all production alert conditions for a specific tenant and trigger alerts.
|
|
|
|
Args:
|
|
tenant_id: Tenant ID to check conditions for
|
|
|
|
Returns:
|
|
int: Total number of alerts generated
|
|
"""
|
|
if not self.database_manager:
|
|
logger.warning("Database manager not available for production checks")
|
|
return 0
|
|
|
|
total_alerts = 0
|
|
|
|
try:
|
|
async with self.database_manager.get_session() as session:
|
|
# Get repositories
|
|
batch_repo = ProductionBatchRepository(session)
|
|
equipment_repo = EquipmentRepository(session)
|
|
|
|
# Check production delays for specific tenant
|
|
delay_alerts = await self._check_production_delays_for_tenant(batch_repo, tenant_id)
|
|
total_alerts += delay_alerts
|
|
|
|
# Check equipment maintenance for specific tenant
|
|
maintenance_alerts = await self._check_equipment_maintenance_for_tenant(equipment_repo, tenant_id)
|
|
total_alerts += maintenance_alerts
|
|
|
|
# Check batch start delays for specific tenant
|
|
start_delay_alerts = await self._check_batch_start_delays_for_tenant(batch_repo, tenant_id)
|
|
total_alerts += start_delay_alerts
|
|
|
|
logger.info(
|
|
"Tenant-specific production alert checks completed",
|
|
tenant_id=str(tenant_id),
|
|
total_alerts=total_alerts,
|
|
production_delays=delay_alerts,
|
|
equipment_maintenance=maintenance_alerts,
|
|
batch_start_delays=start_delay_alerts
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error during tenant-specific production alert checks",
|
|
tenant_id=str(tenant_id),
|
|
error=str(e),
|
|
exc_info=True
|
|
)
|
|
|
|
return total_alerts
|
|
|
|
async def _check_production_delays_for_tenant(self, batch_repo: ProductionBatchRepository, tenant_id: UUID) -> int:
|
|
"""
|
|
Check for production delays for a specific tenant and trigger alerts.
|
|
|
|
Args:
|
|
batch_repo: Production batch repository
|
|
tenant_id: Tenant ID to check for
|
|
|
|
Returns:
|
|
int: Number of delay alerts generated
|
|
"""
|
|
try:
|
|
# Get delayed batches for the specific tenant using repository method
|
|
delayed_batches = await batch_repo.get_production_delays(tenant_id)
|
|
|
|
logger.info("Found delayed production batches for tenant", count=len(delayed_batches), tenant_id=str(tenant_id))
|
|
|
|
alerts_generated = 0
|
|
|
|
for batch in delayed_batches:
|
|
try:
|
|
batch_id = UUID(str(batch["id"]))
|
|
delay_minutes = int(batch["delay_minutes"]) if batch.get("delay_minutes") else 0
|
|
affected_orders = int(batch.get("affected_orders", 0))
|
|
|
|
# Emit production delay alert
|
|
await self.alert_service.emit_production_delay(
|
|
tenant_id=tenant_id,
|
|
batch_id=batch_id,
|
|
product_name=batch.get("product_name", "Unknown Product"),
|
|
batch_number=batch.get("batch_number", "Unknown Batch"),
|
|
delay_minutes=delay_minutes,
|
|
affected_orders=affected_orders
|
|
)
|
|
|
|
alerts_generated += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error emitting production delay alert",
|
|
tenant_id=str(tenant_id),
|
|
batch_id=batch.get("id", "unknown"),
|
|
error=str(e)
|
|
)
|
|
continue
|
|
|
|
return alerts_generated
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking production delays for tenant", tenant_id=str(tenant_id), error=str(e))
|
|
return 0
|
|
|
|
async def _check_equipment_maintenance_for_tenant(self, equipment_repo: EquipmentRepository, tenant_id: UUID) -> int:
|
|
"""
|
|
Check for equipment needing maintenance for a specific tenant and trigger alerts.
|
|
|
|
Args:
|
|
equipment_repo: Equipment repository
|
|
tenant_id: Tenant ID to check for
|
|
|
|
Returns:
|
|
int: Number of maintenance alerts generated
|
|
"""
|
|
try:
|
|
# Get equipment that needs maintenance for specific tenant using repository method
|
|
equipment_needing_maintenance = await equipment_repo.get_equipment_needing_maintenance(tenant_id)
|
|
|
|
logger.info(
|
|
"Found equipment needing maintenance for tenant",
|
|
count=len(equipment_needing_maintenance),
|
|
tenant_id=str(tenant_id)
|
|
)
|
|
|
|
alerts_generated = 0
|
|
|
|
for equipment in equipment_needing_maintenance:
|
|
try:
|
|
equipment_id = UUID(equipment["id"])
|
|
days_overdue = int(equipment.get("days_overdue", 0))
|
|
|
|
# Emit equipment maintenance alert
|
|
await self.alert_service.emit_equipment_maintenance_due(
|
|
tenant_id=tenant_id,
|
|
equipment_id=equipment_id,
|
|
equipment_name=equipment.get("name", "Unknown Equipment"),
|
|
equipment_type=equipment.get("type", "unknown"),
|
|
last_maintenance_date=equipment.get("last_maintenance_date"),
|
|
days_overdue=days_overdue
|
|
)
|
|
|
|
alerts_generated += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error emitting equipment maintenance alert",
|
|
tenant_id=str(tenant_id),
|
|
equipment_id=equipment.get("id", "unknown"),
|
|
error=str(e)
|
|
)
|
|
continue
|
|
|
|
return alerts_generated
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking equipment maintenance for tenant", tenant_id=str(tenant_id), error=str(e))
|
|
return 0
|
|
|
|
async def _check_batch_start_delays_for_tenant(self, batch_repo: ProductionBatchRepository, tenant_id: UUID) -> int:
|
|
"""
|
|
Check for batches that should have started but haven't for a specific tenant.
|
|
|
|
Args:
|
|
batch_repo: Production batch repository
|
|
tenant_id: Tenant ID to check for
|
|
|
|
Returns:
|
|
int: Number of start delay alerts generated
|
|
"""
|
|
try:
|
|
# Get batches that should have started for specific tenant using repository method
|
|
delayed_start_batches = await batch_repo.get_batches_with_delayed_start(tenant_id)
|
|
|
|
logger.info(
|
|
"Found batches with delayed start for tenant",
|
|
count=len(delayed_start_batches),
|
|
tenant_id=str(tenant_id)
|
|
)
|
|
|
|
alerts_generated = 0
|
|
|
|
for batch in delayed_start_batches:
|
|
try:
|
|
batch_id = UUID(batch["id"])
|
|
scheduled_start = batch.get("scheduled_start_time")
|
|
|
|
# Emit batch start delayed alert
|
|
await self.alert_service.emit_batch_start_delayed(
|
|
tenant_id=tenant_id,
|
|
batch_id=batch_id,
|
|
product_name=batch.get("product_name", "Unknown Product"),
|
|
batch_number=batch.get("batch_number", "Unknown Batch"),
|
|
scheduled_start=scheduled_start,
|
|
delay_reason="Batch has not started on time"
|
|
)
|
|
|
|
alerts_generated += 1
|
|
|
|
except Exception as e:
|
|
logger.error(
|
|
"Error emitting batch start delay alert",
|
|
tenant_id=str(tenant_id),
|
|
batch_id=batch.get("id", "unknown"),
|
|
error=str(e)
|
|
)
|
|
continue
|
|
|
|
return alerts_generated
|
|
|
|
except Exception as e:
|
|
logger.error("Error checking batch start delays for tenant", tenant_id=str(tenant_id), error=str(e))
|
|
return 0
|