REFACTOR production scheduler

This commit is contained in:
Urtzi Alfaro
2025-10-09 18:01:24 +02:00
parent 3c689b4f98
commit b420af32c5
13 changed files with 4046 additions and 6 deletions

View File

@@ -11,6 +11,7 @@ import uuid
from app.services.forecasting_service import EnhancedForecastingService
from app.services.prediction_service import PredictionService
from app.services.forecast_cache import get_forecast_cache_service
from app.schemas.forecasts import (
ForecastRequest, ForecastResponse, BatchForecastRequest,
BatchForecastResponse, MultiDayForecastResponse
@@ -53,7 +54,7 @@ async def generate_single_forecast(
current_user: dict = Depends(get_current_user_dep),
enhanced_forecasting_service: EnhancedForecastingService = Depends(get_enhanced_forecasting_service)
):
"""Generate a single product forecast"""
"""Generate a single product forecast with caching support"""
metrics = get_metrics_collector(request_obj)
try:
@@ -65,11 +66,41 @@ async def generate_single_forecast(
if metrics:
metrics.increment_counter("single_forecasts_total")
# Initialize cache service
cache_service = get_forecast_cache_service(settings.REDIS_URL)
# Check cache first
cached_forecast = await cache_service.get_cached_forecast(
tenant_id=uuid.UUID(tenant_id),
product_id=uuid.UUID(request.inventory_product_id),
forecast_date=request.forecast_date
)
if cached_forecast:
if metrics:
metrics.increment_counter("forecast_cache_hits_total")
logger.info("Returning cached forecast",
tenant_id=tenant_id,
forecast_id=cached_forecast.get('id'))
return ForecastResponse(**cached_forecast)
# Cache miss - generate forecast
if metrics:
metrics.increment_counter("forecast_cache_misses_total")
forecast = await enhanced_forecasting_service.generate_forecast(
tenant_id=tenant_id,
request=request
)
# Cache the result
await cache_service.cache_forecast(
tenant_id=uuid.UUID(tenant_id),
product_id=uuid.UUID(request.inventory_product_id),
forecast_date=request.forecast_date,
forecast_data=forecast.dict()
)
if metrics:
metrics.increment_counter("single_forecasts_success_total")

View File

@@ -0,0 +1,518 @@
# services/forecasting/app/services/forecast_cache.py
"""
Forecast Cache Service - Redis-based caching for forecast results
Provides service-level caching for forecast predictions to eliminate redundant
computations when multiple services (Orders, Production) request the same
forecast data within a short time window.
Cache Strategy:
- Key: forecast:{tenant_id}:{product_id}:{forecast_date}
- TTL: Until midnight of day after forecast_date
- Invalidation: On model retraining for specific products
- Metadata: Includes 'cached' flag for observability
"""
import json
import redis
from datetime import datetime, date, timedelta
from typing import Optional, Dict, Any, List
from uuid import UUID
import structlog
logger = structlog.get_logger()
class ForecastCacheService:
"""Service-level caching for forecast predictions"""
def __init__(self, redis_url: str):
"""
Initialize Redis connection for forecast caching
Args:
redis_url: Redis connection URL
"""
self.redis_url = redis_url
self._redis_client = None
self._connect()
def _connect(self):
"""Establish Redis connection with retry logic"""
try:
self._redis_client = redis.from_url(
self.redis_url,
decode_responses=True,
socket_keepalive=True,
socket_keepalive_options={1: 1, 3: 3, 5: 5},
retry_on_timeout=True,
max_connections=100, # Higher limit for forecast service
health_check_interval=30
)
# Test connection
self._redis_client.ping()
logger.info("Forecast cache Redis connection established")
except Exception as e:
logger.error("Failed to connect to forecast cache Redis", error=str(e))
self._redis_client = None
@property
def redis(self):
"""Get Redis client with connection check"""
if self._redis_client is None:
self._connect()
return self._redis_client
def is_available(self) -> bool:
"""Check if Redis cache is available"""
try:
return self.redis is not None and self.redis.ping()
except Exception:
return False
# ================================================================
# FORECAST CACHING
# ================================================================
def _get_forecast_key(
self,
tenant_id: UUID,
product_id: UUID,
forecast_date: date
) -> str:
"""Generate cache key for forecast"""
return f"forecast:{tenant_id}:{product_id}:{forecast_date.isoformat()}"
def _get_batch_forecast_key(
self,
tenant_id: UUID,
product_ids: List[UUID],
forecast_date: date
) -> str:
"""Generate cache key for batch forecast"""
# Sort product IDs for consistent key generation
sorted_ids = sorted(str(pid) for pid in product_ids)
products_hash = hash(tuple(sorted_ids))
return f"forecast:batch:{tenant_id}:{products_hash}:{forecast_date.isoformat()}"
def _calculate_ttl(self, forecast_date: date) -> int:
"""
Calculate TTL for forecast cache entry
Forecasts expire at midnight of the day after forecast_date.
This ensures forecasts remain cached throughout the forecasted day
but don't become stale.
Args:
forecast_date: Date of the forecast
Returns:
TTL in seconds
"""
# Expire at midnight after forecast_date
expiry_datetime = datetime.combine(
forecast_date + timedelta(days=1),
datetime.min.time()
)
now = datetime.now()
ttl_seconds = int((expiry_datetime - now).total_seconds())
# Minimum TTL of 1 hour, maximum of 48 hours
return max(3600, min(ttl_seconds, 172800))
async def get_cached_forecast(
self,
tenant_id: UUID,
product_id: UUID,
forecast_date: date
) -> Optional[Dict[str, Any]]:
"""
Retrieve cached forecast if available
Args:
tenant_id: Tenant identifier
product_id: Product identifier
forecast_date: Date of forecast
Returns:
Cached forecast data or None if not found
"""
if not self.is_available():
return None
try:
key = self._get_forecast_key(tenant_id, product_id, forecast_date)
cached_data = self.redis.get(key)
if cached_data:
forecast_data = json.loads(cached_data)
# Add cache hit metadata
forecast_data['cached'] = True
forecast_data['cache_hit_at'] = datetime.now().isoformat()
logger.info("Forecast cache HIT",
tenant_id=str(tenant_id),
product_id=str(product_id),
forecast_date=str(forecast_date))
return forecast_data
logger.debug("Forecast cache MISS",
tenant_id=str(tenant_id),
product_id=str(product_id),
forecast_date=str(forecast_date))
return None
except Exception as e:
logger.error("Error retrieving cached forecast",
error=str(e),
tenant_id=str(tenant_id))
return None
async def cache_forecast(
self,
tenant_id: UUID,
product_id: UUID,
forecast_date: date,
forecast_data: Dict[str, Any]
) -> bool:
"""
Cache forecast prediction result
Args:
tenant_id: Tenant identifier
product_id: Product identifier
forecast_date: Date of forecast
forecast_data: Forecast prediction data to cache
Returns:
True if cached successfully, False otherwise
"""
if not self.is_available():
logger.warning("Redis not available, skipping forecast cache")
return False
try:
key = self._get_forecast_key(tenant_id, product_id, forecast_date)
ttl = self._calculate_ttl(forecast_date)
# Add caching metadata
cache_entry = {
**forecast_data,
'cached_at': datetime.now().isoformat(),
'cache_key': key,
'ttl_seconds': ttl
}
# Serialize and cache
self.redis.setex(
key,
ttl,
json.dumps(cache_entry, default=str)
)
logger.info("Forecast cached successfully",
tenant_id=str(tenant_id),
product_id=str(product_id),
forecast_date=str(forecast_date),
ttl_hours=round(ttl / 3600, 2))
return True
except Exception as e:
logger.error("Error caching forecast",
error=str(e),
tenant_id=str(tenant_id))
return False
async def get_cached_batch_forecast(
self,
tenant_id: UUID,
product_ids: List[UUID],
forecast_date: date
) -> Optional[Dict[str, Any]]:
"""
Retrieve cached batch forecast
Args:
tenant_id: Tenant identifier
product_ids: List of product identifiers
forecast_date: Date of forecast
Returns:
Cached batch forecast data or None
"""
if not self.is_available():
return None
try:
key = self._get_batch_forecast_key(tenant_id, product_ids, forecast_date)
cached_data = self.redis.get(key)
if cached_data:
forecast_data = json.loads(cached_data)
forecast_data['cached'] = True
forecast_data['cache_hit_at'] = datetime.now().isoformat()
logger.info("Batch forecast cache HIT",
tenant_id=str(tenant_id),
products_count=len(product_ids),
forecast_date=str(forecast_date))
return forecast_data
return None
except Exception as e:
logger.error("Error retrieving cached batch forecast", error=str(e))
return None
async def cache_batch_forecast(
self,
tenant_id: UUID,
product_ids: List[UUID],
forecast_date: date,
forecast_data: Dict[str, Any]
) -> bool:
"""Cache batch forecast result"""
if not self.is_available():
return False
try:
key = self._get_batch_forecast_key(tenant_id, product_ids, forecast_date)
ttl = self._calculate_ttl(forecast_date)
cache_entry = {
**forecast_data,
'cached_at': datetime.now().isoformat(),
'cache_key': key,
'ttl_seconds': ttl
}
self.redis.setex(key, ttl, json.dumps(cache_entry, default=str))
logger.info("Batch forecast cached successfully",
tenant_id=str(tenant_id),
products_count=len(product_ids),
ttl_hours=round(ttl / 3600, 2))
return True
except Exception as e:
logger.error("Error caching batch forecast", error=str(e))
return False
# ================================================================
# CACHE INVALIDATION
# ================================================================
async def invalidate_product_forecasts(
self,
tenant_id: UUID,
product_id: UUID
) -> int:
"""
Invalidate all forecast cache entries for a product
Called when model is retrained for specific product.
Args:
tenant_id: Tenant identifier
product_id: Product identifier
Returns:
Number of cache entries invalidated
"""
if not self.is_available():
return 0
try:
# Find all keys matching this product
pattern = f"forecast:{tenant_id}:{product_id}:*"
keys = self.redis.keys(pattern)
if keys:
deleted = self.redis.delete(*keys)
logger.info("Invalidated product forecast cache",
tenant_id=str(tenant_id),
product_id=str(product_id),
keys_deleted=deleted)
return deleted
return 0
except Exception as e:
logger.error("Error invalidating product forecasts",
error=str(e),
tenant_id=str(tenant_id))
return 0
async def invalidate_tenant_forecasts(
self,
tenant_id: UUID,
forecast_date: Optional[date] = None
) -> int:
"""
Invalidate forecast cache for tenant
Args:
tenant_id: Tenant identifier
forecast_date: Optional specific date to invalidate
Returns:
Number of cache entries invalidated
"""
if not self.is_available():
return 0
try:
if forecast_date:
pattern = f"forecast:{tenant_id}:*:{forecast_date.isoformat()}"
else:
pattern = f"forecast:{tenant_id}:*"
keys = self.redis.keys(pattern)
if keys:
deleted = self.redis.delete(*keys)
logger.info("Invalidated tenant forecast cache",
tenant_id=str(tenant_id),
forecast_date=str(forecast_date) if forecast_date else "all",
keys_deleted=deleted)
return deleted
return 0
except Exception as e:
logger.error("Error invalidating tenant forecasts", error=str(e))
return 0
async def invalidate_all_forecasts(self) -> int:
"""
Invalidate all forecast cache entries (use with caution)
Returns:
Number of cache entries invalidated
"""
if not self.is_available():
return 0
try:
pattern = "forecast:*"
keys = self.redis.keys(pattern)
if keys:
deleted = self.redis.delete(*keys)
logger.warning("Invalidated ALL forecast cache", keys_deleted=deleted)
return deleted
return 0
except Exception as e:
logger.error("Error invalidating all forecasts", error=str(e))
return 0
# ================================================================
# CACHE STATISTICS & MONITORING
# ================================================================
def get_cache_stats(self) -> Dict[str, Any]:
"""
Get cache statistics for monitoring
Returns:
Dictionary with cache metrics
"""
if not self.is_available():
return {"available": False}
try:
info = self.redis.info()
# Get forecast-specific stats
forecast_keys = self.redis.keys("forecast:*")
batch_keys = self.redis.keys("forecast:batch:*")
return {
"available": True,
"total_forecast_keys": len(forecast_keys),
"batch_forecast_keys": len(batch_keys),
"single_forecast_keys": len(forecast_keys) - len(batch_keys),
"used_memory": info.get("used_memory_human"),
"connected_clients": info.get("connected_clients"),
"keyspace_hits": info.get("keyspace_hits", 0),
"keyspace_misses": info.get("keyspace_misses", 0),
"hit_rate_percent": self._calculate_hit_rate(
info.get("keyspace_hits", 0),
info.get("keyspace_misses", 0)
),
"total_commands_processed": info.get("total_commands_processed", 0)
}
except Exception as e:
logger.error("Error getting cache stats", error=str(e))
return {"available": False, "error": str(e)}
def _calculate_hit_rate(self, hits: int, misses: int) -> float:
"""Calculate cache hit rate percentage"""
total = hits + misses
return round((hits / total * 100), 2) if total > 0 else 0.0
async def get_cached_forecast_info(
self,
tenant_id: UUID,
product_id: UUID,
forecast_date: date
) -> Optional[Dict[str, Any]]:
"""
Get metadata about cached forecast without retrieving full data
Args:
tenant_id: Tenant identifier
product_id: Product identifier
forecast_date: Date of forecast
Returns:
Cache metadata or None
"""
if not self.is_available():
return None
try:
key = self._get_forecast_key(tenant_id, product_id, forecast_date)
ttl = self.redis.ttl(key)
if ttl > 0:
return {
"cached": True,
"cache_key": key,
"ttl_seconds": ttl,
"ttl_hours": round(ttl / 3600, 2),
"expires_at": (datetime.now() + timedelta(seconds=ttl)).isoformat()
}
return None
except Exception as e:
logger.error("Error getting forecast cache info", error=str(e))
return None
# Global cache service instance
_cache_service = None
def get_forecast_cache_service(redis_url: Optional[str] = None) -> ForecastCacheService:
"""
Get the global forecast cache service instance
Args:
redis_url: Redis connection URL (required for first call)
Returns:
ForecastCacheService instance
"""
global _cache_service
if _cache_service is None:
if redis_url is None:
raise ValueError("redis_url required for first initialization")
_cache_service = ForecastCacheService(redis_url)
return _cache_service

View File

@@ -309,6 +309,9 @@ class ProcurementService:
elif status == "cancelled":
updates["execution_completed_at"] = datetime.utcnow()
# Handle plan rejection workflow - trigger notification and potential regeneration
await self._handle_plan_rejection(tenant_id, plan_id, approval_notes, updated_by)
plan = await self.plan_repo.update_plan(plan_id, tenant_id, updates)
if plan:
await self.db.commit()
@@ -1238,6 +1241,168 @@ class ProcurementService:
except Exception as e:
logger.warning("Failed to publish event", error=str(e))
async def _handle_plan_rejection(
self,
tenant_id: uuid.UUID,
plan_id: uuid.UUID,
rejection_notes: Optional[str],
rejected_by: Optional[uuid.UUID]
) -> None:
"""
Handle plan rejection workflow with notifications and optional regeneration
When a plan is rejected:
1. Send notifications to stakeholders
2. Analyze rejection reason
3. Offer regeneration option
4. Publish rejection event
"""
try:
logger.info("Processing plan rejection",
tenant_id=str(tenant_id),
plan_id=str(plan_id),
rejected_by=str(rejected_by) if rejected_by else None)
# Get plan details
plan = await self.plan_repo.get_plan_by_id(plan_id, tenant_id)
if not plan:
logger.error("Plan not found for rejection handling", plan_id=plan_id)
return
# Send notification to stakeholders
await self._send_plan_rejection_notification(
tenant_id, plan_id, plan.plan_number, rejection_notes, rejected_by
)
# Publish rejection event with details
await self._publish_plan_rejection_event(
tenant_id, plan_id, rejection_notes, rejected_by
)
# Check if we should auto-regenerate (e.g., if rejection due to stale data)
should_regenerate = self._should_auto_regenerate_plan(rejection_notes)
if should_regenerate:
logger.info("Auto-regenerating plan after rejection",
plan_id=plan_id, reason="stale data detected")
# Schedule regeneration (async task to not block rejection)
await self._schedule_plan_regeneration(tenant_id, plan.plan_date)
except Exception as e:
logger.error("Error handling plan rejection",
error=str(e),
plan_id=plan_id,
tenant_id=str(tenant_id))
def _should_auto_regenerate_plan(self, rejection_notes: Optional[str]) -> bool:
"""Determine if plan should be auto-regenerated based on rejection reason"""
if not rejection_notes:
return False
# Auto-regenerate if rejection mentions stale data or outdated forecasts
auto_regenerate_keywords = [
"stale", "outdated", "old data", "datos antiguos",
"desactualizado", "obsoleto"
]
rejection_lower = rejection_notes.lower()
return any(keyword in rejection_lower for keyword in auto_regenerate_keywords)
async def _send_plan_rejection_notification(
self,
tenant_id: uuid.UUID,
plan_id: uuid.UUID,
plan_number: str,
rejection_notes: Optional[str],
rejected_by: Optional[uuid.UUID]
) -> None:
"""Send notifications about plan rejection"""
try:
notification_data = {
"type": "procurement_plan_rejected",
"severity": "medium",
"title": f"Plan de Aprovisionamiento Rechazado: {plan_number}",
"message": f"El plan {plan_number} ha sido rechazado. {rejection_notes or 'Sin motivo especificado.'}",
"metadata": {
"tenant_id": str(tenant_id),
"plan_id": str(plan_id),
"plan_number": plan_number,
"rejection_notes": rejection_notes,
"rejected_by": str(rejected_by) if rejected_by else None,
"rejected_at": datetime.utcnow().isoformat(),
"action_required": "review_and_regenerate"
}
}
await self.rabbitmq_client.publish_event(
exchange_name="bakery_events",
routing_key="procurement.plan.rejected",
event_data=notification_data
)
logger.info("Plan rejection notification sent",
tenant_id=str(tenant_id),
plan_id=str(plan_id))
except Exception as e:
logger.error("Failed to send plan rejection notification", error=str(e))
async def _publish_plan_rejection_event(
self,
tenant_id: uuid.UUID,
plan_id: uuid.UUID,
rejection_notes: Optional[str],
rejected_by: Optional[uuid.UUID]
) -> None:
"""Publish plan rejection event for downstream systems"""
try:
event_data = {
"tenant_id": str(tenant_id),
"plan_id": str(plan_id),
"rejection_notes": rejection_notes,
"rejected_by": str(rejected_by) if rejected_by else None,
"timestamp": datetime.utcnow().isoformat(),
"event_type": "procurement.plan.rejected"
}
await self.rabbitmq_client.publish_event(
exchange_name="procurement.events",
routing_key="procurement.plan.rejected",
event_data=event_data
)
except Exception as e:
logger.warning("Failed to publish plan rejection event", error=str(e))
async def _schedule_plan_regeneration(
self,
tenant_id: uuid.UUID,
plan_date: date
) -> None:
"""Schedule automatic plan regeneration after rejection"""
try:
logger.info("Scheduling plan regeneration",
tenant_id=str(tenant_id),
plan_date=str(plan_date))
# Publish regeneration request event
event_data = {
"tenant_id": str(tenant_id),
"plan_date": plan_date.isoformat(),
"trigger": "rejection_auto_regenerate",
"timestamp": datetime.utcnow().isoformat(),
"event_type": "procurement.plan.regeneration_requested"
}
await self.rabbitmq_client.publish_event(
exchange_name="procurement.events",
routing_key="procurement.plan.regeneration_requested",
event_data=event_data
)
except Exception as e:
logger.error("Failed to schedule plan regeneration", error=str(e))
async def _publish_plan_status_changed_event(
self,
tenant_id: uuid.UUID,

View File

@@ -12,6 +12,7 @@ from sqlalchemy import text
from app.core.config import settings
from app.core.database import database_manager
from app.services.production_alert_service import ProductionAlertService
from app.services.production_scheduler_service import ProductionSchedulerService
from shared.service_base import StandardFastAPIService
# Import standardized routers
@@ -56,8 +57,9 @@ class ProductionService(StandardFastAPIService):
]
self.alert_service = None
self.scheduler_service = None
# Create custom checks for alert service
# Create custom checks for services
async def check_alert_service():
"""Check production alert service health"""
try:
@@ -66,6 +68,14 @@ class ProductionService(StandardFastAPIService):
self.logger.error("Alert service health check failed", error=str(e))
return False
async def check_scheduler_service():
"""Check production scheduler service health"""
try:
return bool(self.scheduler_service) if self.scheduler_service else False
except Exception as e:
self.logger.error("Scheduler service health check failed", error=str(e))
return False
super().__init__(
service_name=settings.SERVICE_NAME,
app_name=settings.APP_NAME,
@@ -74,7 +84,10 @@ class ProductionService(StandardFastAPIService):
api_prefix="", # Empty because RouteBuilder already includes /api/v1
database_manager=database_manager,
expected_tables=production_expected_tables,
custom_health_checks={"alert_service": check_alert_service}
custom_health_checks={
"alert_service": check_alert_service,
"scheduler_service": check_scheduler_service
}
)
async def on_startup(self, app: FastAPI):
@@ -84,11 +97,22 @@ class ProductionService(StandardFastAPIService):
await self.alert_service.start()
self.logger.info("Production alert service started")
# Store alert service in app state
# Initialize production scheduler service
self.scheduler_service = ProductionSchedulerService(settings)
await self.scheduler_service.start()
self.logger.info("Production scheduler service started")
# Store services in app state
app.state.alert_service = self.alert_service
app.state.scheduler_service = self.scheduler_service
async def on_shutdown(self, app: FastAPI):
"""Custom shutdown logic for production service"""
"""Custom startup logic for production service"""
# Stop scheduler service
if self.scheduler_service:
await self.scheduler_service.stop()
self.logger.info("Scheduler service stopped")
# Stop alert service
if self.alert_service:
await self.alert_service.stop()
@@ -100,6 +124,7 @@ class ProductionService(StandardFastAPIService):
"production_planning",
"batch_management",
"production_scheduling",
"automated_daily_scheduling", # NEW: Automated scheduler
"quality_control",
"equipment_management",
"capacity_planning",
@@ -144,6 +169,21 @@ service.add_router(production_dashboard.router)
service.add_router(analytics.router)
@app.post("/test/production-scheduler")
async def test_production_scheduler():
"""Test endpoint to manually trigger production scheduler"""
try:
if hasattr(app.state, 'scheduler_service'):
scheduler_service = app.state.scheduler_service
await scheduler_service.test_production_schedule_generation()
return {"message": "Production scheduler test triggered successfully"}
else:
return {"error": "Scheduler service not available"}
except Exception as e:
service.logger.error("Error testing production scheduler", error=str(e))
return {"error": f"Failed to trigger scheduler test: {str(e)}"}
if __name__ == "__main__":
import uvicorn
uvicorn.run(

View File

@@ -0,0 +1,493 @@
# services/production/app/services/production_scheduler_service.py
"""
Production Scheduler Service - Daily production planning automation
Automatically generates daily production schedules for all active tenants based on:
- Demand forecasts from Orders Service
- Current inventory levels
- Production capacity
- Recipe requirements
Runs daily at 5:30 AM (before procurement @ 6:00 AM) to ensure production
plans are ready for the day ahead.
"""
import asyncio
from datetime import datetime, timedelta, date
from typing import List, Dict, Any, Optional
from uuid import UUID
from decimal import Decimal
import structlog
from apscheduler.triggers.cron import CronTrigger
from zoneinfo import ZoneInfo
from shared.alerts.base_service import BaseAlertService, AlertServiceMixin
from shared.database.base import create_database_manager
from app.services.production_service import ProductionService
from app.schemas.production import ProductionScheduleCreate, ProductionBatchCreate
from app.models.production import ProductionStatus, ProductionPriority
logger = structlog.get_logger()
class ProductionSchedulerService(BaseAlertService, AlertServiceMixin):
"""
Production scheduler service for automated daily production planning
Extends BaseAlertService to use proven scheduling infrastructure
"""
def __init__(self, config):
super().__init__(config)
self.production_service = None
async def start(self):
"""Initialize scheduler and production service"""
await super().start()
# Store database manager for session creation
from app.core.database import database_manager
self.db_manager = database_manager
logger.info("Production scheduler service started", service=self.config.SERVICE_NAME)
def setup_scheduled_checks(self):
"""Configure daily production planning jobs"""
# Daily production planning at 5:30 AM (before procurement)
# This ensures production plans are ready before procurement plans
self.scheduler.add_job(
func=self.run_daily_production_planning,
trigger=CronTrigger(hour=5, minute=30),
id="daily_production_planning",
name="Daily Production Planning",
misfire_grace_time=300, # 5 minutes grace period
coalesce=True, # Combine missed runs
max_instances=1 # Only one instance at a time
)
# Stale schedule cleanup at 5:50 AM
self.scheduler.add_job(
func=self.run_stale_schedule_cleanup,
trigger=CronTrigger(hour=5, minute=50),
id="stale_schedule_cleanup",
name="Stale Schedule Cleanup",
misfire_grace_time=300,
coalesce=True,
max_instances=1
)
# Test job for development (every 30 minutes if DEBUG enabled)
if getattr(self.config, 'DEBUG', False) or getattr(self.config, 'PRODUCTION_TEST_MODE', False):
self.scheduler.add_job(
func=self.run_daily_production_planning,
trigger=CronTrigger(minute='*/30'),
id="test_production_planning",
name="Test Production Planning (30min)",
misfire_grace_time=300,
coalesce=True,
max_instances=1
)
logger.info("⚡ Test production planning job added (every 30 minutes)")
logger.info("📅 Production scheduled jobs configured",
jobs_count=len(self.scheduler.get_jobs()))
async def run_daily_production_planning(self):
"""
Execute daily production planning for all active tenants
Processes tenants in parallel with individual timeouts
"""
if not self.is_leader:
logger.debug("Skipping production planning - not leader")
return
try:
self._checks_performed += 1
logger.info("🔄 Starting daily production planning execution",
timestamp=datetime.now().isoformat())
# Get active non-demo tenants
active_tenants = await self.get_active_tenants()
if not active_tenants:
logger.info("No active tenants found for production planning")
return
logger.info(f"Processing {len(active_tenants)} tenants in parallel")
# Create tasks with timeout for each tenant
tasks = [
self._process_tenant_with_timeout(tenant_id, timeout_seconds=180)
for tenant_id in active_tenants
]
# Execute all tasks in parallel
results = await asyncio.gather(*tasks, return_exceptions=True)
# Count successes and failures
processed_tenants = sum(1 for r in results if r is True)
failed_tenants = sum(1 for r in results if isinstance(r, Exception) or r is False)
logger.info("🎯 Daily production planning completed",
total_tenants=len(active_tenants),
processed_tenants=processed_tenants,
failed_tenants=failed_tenants)
except Exception as e:
self._errors_count += 1
logger.error("💥 Daily production planning failed completely", error=str(e))
async def _process_tenant_with_timeout(self, tenant_id: UUID, timeout_seconds: int = 180) -> bool:
"""
Process tenant production planning with timeout
Returns True on success, False or raises exception on failure
"""
try:
await asyncio.wait_for(
self.process_tenant_production(tenant_id),
timeout=timeout_seconds
)
logger.info("✅ Successfully processed tenant", tenant_id=str(tenant_id))
return True
except asyncio.TimeoutError:
logger.error("⏱️ Tenant processing timed out",
tenant_id=str(tenant_id),
timeout=timeout_seconds)
return False
except Exception as e:
logger.error("❌ Error processing tenant production",
tenant_id=str(tenant_id),
error=str(e))
raise
async def process_tenant_production(self, tenant_id: UUID):
"""Process production planning for a specific tenant"""
try:
# Get tenant timezone for accurate date calculation
tenant_tz = await self._get_tenant_timezone(tenant_id)
# Calculate target date in tenant's timezone
target_date = datetime.now(ZoneInfo(tenant_tz)).date()
logger.info("Processing production for tenant",
tenant_id=str(tenant_id),
target_date=str(target_date),
timezone=tenant_tz)
# Check if schedule already exists for this date
async with self.db_manager.get_session() as session:
production_service = ProductionService(self.db_manager, self.config)
# Check for existing schedule
existing_schedule = await self._get_schedule_by_date(
session, tenant_id, target_date
)
if existing_schedule:
logger.info("📋 Production schedule already exists, skipping",
tenant_id=str(tenant_id),
schedule_date=str(target_date),
schedule_id=str(existing_schedule.get('id')))
return
# Calculate daily requirements
requirements = await production_service.calculate_daily_requirements(
tenant_id, target_date
)
if not requirements.production_plan:
logger.info("No production requirements for date",
tenant_id=str(tenant_id),
date=str(target_date))
return
# Create production schedule
schedule_data = ProductionScheduleCreate(
schedule_date=target_date,
schedule_name=f"Daily Production - {target_date.strftime('%Y-%m-%d')}",
status="draft",
notes=f"Auto-generated daily production schedule for {target_date}",
total_batches=len(requirements.production_plan),
auto_generated=True
)
schedule = await production_service.create_production_schedule(
tenant_id, schedule_data
)
# Create production batches from requirements
batches_created = 0
for item in requirements.production_plan:
try:
batch_data = await self._create_batch_from_requirement(
item, schedule.id, target_date
)
batch = await production_service.create_production_batch(
tenant_id, batch_data
)
batches_created += 1
except Exception as e:
logger.error("Error creating batch from requirement",
tenant_id=str(tenant_id),
product=item.get('product_name'),
error=str(e))
# Send notification about new schedule
await self.send_production_schedule_notification(
tenant_id, schedule.id, batches_created
)
logger.info("🎉 Production schedule created successfully",
tenant_id=str(tenant_id),
schedule_id=str(schedule.id),
schedule_date=str(target_date),
batches_created=batches_created)
except Exception as e:
logger.error("💥 Error processing tenant production",
tenant_id=str(tenant_id),
error=str(e))
raise
async def _get_tenant_timezone(self, tenant_id: UUID) -> str:
"""Get tenant's timezone, fallback to UTC if not configured"""
try:
from services.tenant.app.models.tenants import Tenant
from sqlalchemy import select
import os
tenant_db_url = os.getenv("TENANT_DATABASE_URL")
if not tenant_db_url:
logger.warning("TENANT_DATABASE_URL not set, using UTC")
return "UTC"
tenant_db = create_database_manager(tenant_db_url, "tenant-tz-lookup")
async with tenant_db.get_session() as session:
result = await session.execute(
select(Tenant).where(Tenant.id == tenant_id)
)
tenant = result.scalars().first()
if tenant and hasattr(tenant, 'timezone') and tenant.timezone:
return tenant.timezone
# Default to Europe/Madrid for Spanish bakeries
return "Europe/Madrid"
except Exception as e:
logger.warning("Could not fetch tenant timezone, using UTC",
tenant_id=str(tenant_id), error=str(e))
return "UTC"
async def _get_schedule_by_date(self, session, tenant_id: UUID, schedule_date: date) -> Optional[Dict]:
"""Check if production schedule exists for date"""
try:
from sqlalchemy import select, and_
from app.models.production import ProductionSchedule
result = await session.execute(
select(ProductionSchedule).where(
and_(
ProductionSchedule.tenant_id == tenant_id,
ProductionSchedule.schedule_date == schedule_date
)
)
)
schedule = result.scalars().first()
if schedule:
return {"id": schedule.id, "status": schedule.status}
return None
except Exception as e:
logger.error("Error checking existing schedule", error=str(e))
return None
async def _create_batch_from_requirement(
self,
requirement: Dict[str, Any],
schedule_id: UUID,
target_date: date
) -> ProductionBatchCreate:
"""Create batch data from production requirement"""
# Map urgency to priority
urgency_to_priority = {
"high": ProductionPriority.HIGH,
"medium": ProductionPriority.MEDIUM,
"low": ProductionPriority.LOW
}
priority = urgency_to_priority.get(requirement.get('urgency', 'medium'), ProductionPriority.MEDIUM)
# Calculate planned times (start at 6 AM, estimate 2 hours per batch)
planned_start = datetime.combine(target_date, datetime.min.time().replace(hour=6))
planned_duration = 120 # 2 hours default
return ProductionBatchCreate(
schedule_id=schedule_id,
product_id=UUID(requirement['product_id']),
product_name=requirement['product_name'],
planned_quantity=Decimal(str(requirement['recommended_production'])),
unit_of_measure="units",
priority=priority,
status=ProductionStatus.PLANNED,
planned_start_time=planned_start,
planned_duration_minutes=planned_duration,
notes=f"Auto-generated from demand forecast. Urgency: {requirement.get('urgency', 'medium')}",
auto_generated=True
)
async def run_stale_schedule_cleanup(self):
"""
Clean up stale production schedules and send reminders
"""
if not self.is_leader:
logger.debug("Skipping stale schedule cleanup - not leader")
return
try:
logger.info("🧹 Starting stale schedule cleanup")
active_tenants = await self.get_active_tenants()
if not active_tenants:
logger.info("No active tenants found for cleanup")
return
total_archived = 0
total_cancelled = 0
total_escalated = 0
# Process each tenant's stale schedules
for tenant_id in active_tenants:
try:
stats = await self._cleanup_tenant_schedules(tenant_id)
total_archived += stats.get('archived', 0)
total_cancelled += stats.get('cancelled', 0)
total_escalated += stats.get('escalated', 0)
except Exception as e:
logger.error("Error cleaning up tenant schedules",
tenant_id=str(tenant_id),
error=str(e))
logger.info("✅ Stale schedule cleanup completed",
archived=total_archived,
cancelled=total_cancelled,
escalated=total_escalated)
except Exception as e:
self._errors_count += 1
logger.error("💥 Stale schedule cleanup failed", error=str(e))
async def _cleanup_tenant_schedules(self, tenant_id: UUID) -> Dict[str, int]:
"""Cleanup stale schedules for a specific tenant"""
stats = {"archived": 0, "cancelled": 0, "escalated": 0}
try:
async with self.db_manager.get_session() as session:
from sqlalchemy import select, and_
from app.models.production import ProductionSchedule
today = date.today()
# Get all schedules for tenant
result = await session.execute(
select(ProductionSchedule).where(
ProductionSchedule.tenant_id == tenant_id
)
)
schedules = result.scalars().all()
for schedule in schedules:
schedule_age_days = (today - schedule.schedule_date).days
# Archive completed schedules older than 90 days
if schedule.status == "completed" and schedule_age_days > 90:
schedule.archived = True
stats["archived"] += 1
# Cancel draft schedules older than 7 days
elif schedule.status == "draft" and schedule_age_days > 7:
schedule.status = "cancelled"
schedule.notes = (schedule.notes or "") + "\nAuto-cancelled: stale draft schedule"
stats["cancelled"] += 1
# Escalate overdue schedules
elif schedule.schedule_date == today and schedule.status in ['draft', 'pending_approval']:
await self._send_schedule_escalation_alert(tenant_id, schedule.id)
stats["escalated"] += 1
await session.commit()
except Exception as e:
logger.error("Error in tenant schedule cleanup",
tenant_id=str(tenant_id), error=str(e))
return stats
async def send_production_schedule_notification(
self,
tenant_id: UUID,
schedule_id: UUID,
batches_count: int
):
"""Send notification about new production schedule"""
try:
alert_data = {
"type": "production_schedule_created",
"severity": "low",
"title": "Nuevo Plan de Producción Generado",
"message": f"Plan de producción diario creado con {batches_count} lotes programados",
"metadata": {
"tenant_id": str(tenant_id),
"schedule_id": str(schedule_id),
"batches_count": batches_count,
"auto_generated": True
}
}
await self.publish_item(tenant_id, alert_data, item_type='alert')
except Exception as e:
logger.error("Error sending schedule notification",
tenant_id=str(tenant_id),
error=str(e))
async def _send_schedule_escalation_alert(self, tenant_id: UUID, schedule_id: UUID):
"""Send escalation alert for overdue schedule"""
try:
alert_data = {
"type": "schedule_escalation",
"severity": "high",
"title": "Plan de Producción Vencido",
"message": "Plan de producción para hoy no ha sido procesado - Requiere atención urgente",
"metadata": {
"tenant_id": str(tenant_id),
"schedule_id": str(schedule_id),
"escalation_level": "urgent"
}
}
await self.publish_item(tenant_id, alert_data, item_type='alert')
except Exception as e:
logger.error("Error sending escalation alert", error=str(e))
async def test_production_schedule_generation(self):
"""Test method to manually trigger production planning"""
active_tenants = await self.get_active_tenants()
if not active_tenants:
logger.error("No active tenants found for testing production schedule generation")
return
test_tenant_id = active_tenants[0]
logger.info("Testing production schedule generation", tenant_id=str(test_tenant_id))
try:
await self.process_tenant_production(test_tenant_id)
logger.info("Test production schedule generation completed successfully")
except Exception as e:
logger.error("Test production schedule generation failed",
error=str(e), tenant_id=str(test_tenant_id))

View File

@@ -28,7 +28,10 @@ class Tenant(Base):
postal_code = Column(String(10), nullable=False)
latitude = Column(Float)
longitude = Column(Float)
# Timezone configuration for accurate scheduling
timezone = Column(String(50), default="Europe/Madrid", nullable=False)
# Contact info
phone = Column(String(20))
email = Column(String(255))

View File

@@ -0,0 +1,27 @@
"""Add timezone column to tenants
Revision ID: 20251009_timezone
Revises: 964ef5a3ac09
Create Date: 2025-10-09
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = '20251009_timezone'
down_revision = '964ef5a3ac09'
branch_labels = None
depends_on = None
def upgrade() -> None:
"""Add timezone column to tenants table for accurate scheduling"""
# Add timezone column with default Europe/Madrid
op.add_column('tenants', sa.Column('timezone', sa.String(50), nullable=False, server_default='Europe/Madrid'))
def downgrade() -> None:
"""Remove timezone column from tenants table"""
op.drop_column('tenants', 'timezone')