REFACTOR production scheduler
This commit is contained in:
258
shared/monitoring/scheduler_metrics.py
Normal file
258
shared/monitoring/scheduler_metrics.py
Normal file
@@ -0,0 +1,258 @@
|
||||
# shared/monitoring/scheduler_metrics.py
|
||||
"""
|
||||
Scheduler Metrics - Prometheus metrics for production and procurement schedulers
|
||||
|
||||
Provides comprehensive metrics for monitoring automated daily planning:
|
||||
- Scheduler execution success/failure rates
|
||||
- Tenant processing times
|
||||
- Cache hit rates for forecasts
|
||||
- Plan generation statistics
|
||||
"""
|
||||
|
||||
from prometheus_client import Counter, Histogram, Gauge, Info
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# ================================================================
|
||||
# PRODUCTION SCHEDULER METRICS
|
||||
# ================================================================
|
||||
|
||||
production_schedules_generated_total = Counter(
|
||||
'production_schedules_generated_total',
|
||||
'Total number of production schedules generated',
|
||||
['tenant_id', 'status'] # status: success, failure
|
||||
)
|
||||
|
||||
production_schedule_generation_duration_seconds = Histogram(
|
||||
'production_schedule_generation_duration_seconds',
|
||||
'Time taken to generate production schedule per tenant',
|
||||
['tenant_id'],
|
||||
buckets=[1, 5, 10, 30, 60, 120, 180, 300] # seconds
|
||||
)
|
||||
|
||||
production_tenants_processed_total = Counter(
|
||||
'production_tenants_processed_total',
|
||||
'Total number of tenants processed by production scheduler',
|
||||
['status'] # status: success, failure, timeout
|
||||
)
|
||||
|
||||
production_batches_created_total = Counter(
|
||||
'production_batches_created_total',
|
||||
'Total number of production batches created',
|
||||
['tenant_id']
|
||||
)
|
||||
|
||||
production_scheduler_runs_total = Counter(
|
||||
'production_scheduler_runs_total',
|
||||
'Total number of production scheduler executions',
|
||||
['trigger'] # trigger: scheduled, manual, test
|
||||
)
|
||||
|
||||
production_scheduler_errors_total = Counter(
|
||||
'production_scheduler_errors_total',
|
||||
'Total number of production scheduler errors',
|
||||
['error_type']
|
||||
)
|
||||
|
||||
# ================================================================
|
||||
# PROCUREMENT SCHEDULER METRICS
|
||||
# ================================================================
|
||||
|
||||
procurement_plans_generated_total = Counter(
|
||||
'procurement_plans_generated_total',
|
||||
'Total number of procurement plans generated',
|
||||
['tenant_id', 'status'] # status: success, failure
|
||||
)
|
||||
|
||||
procurement_plan_generation_duration_seconds = Histogram(
|
||||
'procurement_plan_generation_duration_seconds',
|
||||
'Time taken to generate procurement plan per tenant',
|
||||
['tenant_id'],
|
||||
buckets=[1, 5, 10, 30, 60, 120, 180, 300]
|
||||
)
|
||||
|
||||
procurement_tenants_processed_total = Counter(
|
||||
'procurement_tenants_processed_total',
|
||||
'Total number of tenants processed by procurement scheduler',
|
||||
['status'] # status: success, failure, timeout
|
||||
)
|
||||
|
||||
procurement_requirements_created_total = Counter(
|
||||
'procurement_requirements_created_total',
|
||||
'Total number of procurement requirements created',
|
||||
['tenant_id', 'priority'] # priority: critical, high, medium, low
|
||||
)
|
||||
|
||||
procurement_scheduler_runs_total = Counter(
|
||||
'procurement_scheduler_runs_total',
|
||||
'Total number of procurement scheduler executions',
|
||||
['trigger'] # trigger: scheduled, manual, test
|
||||
)
|
||||
|
||||
procurement_plan_rejections_total = Counter(
|
||||
'procurement_plan_rejections_total',
|
||||
'Total number of procurement plans rejected',
|
||||
['tenant_id', 'auto_regenerated'] # auto_regenerated: true, false
|
||||
)
|
||||
|
||||
procurement_plans_by_status = Gauge(
|
||||
'procurement_plans_by_status',
|
||||
'Number of procurement plans by status',
|
||||
['tenant_id', 'status']
|
||||
)
|
||||
|
||||
# ================================================================
|
||||
# FORECAST CACHING METRICS
|
||||
# ================================================================
|
||||
|
||||
forecast_cache_hits_total = Counter(
|
||||
'forecast_cache_hits_total',
|
||||
'Total number of forecast cache hits',
|
||||
['tenant_id']
|
||||
)
|
||||
|
||||
forecast_cache_misses_total = Counter(
|
||||
'forecast_cache_misses_total',
|
||||
'Total number of forecast cache misses',
|
||||
['tenant_id']
|
||||
)
|
||||
|
||||
forecast_cache_hit_rate = Gauge(
|
||||
'forecast_cache_hit_rate',
|
||||
'Forecast cache hit rate percentage (0-100)',
|
||||
['tenant_id']
|
||||
)
|
||||
|
||||
forecast_cache_entries_total = Gauge(
|
||||
'forecast_cache_entries_total',
|
||||
'Total number of entries in forecast cache',
|
||||
['cache_type'] # cache_type: single, batch
|
||||
)
|
||||
|
||||
forecast_cache_invalidations_total = Counter(
|
||||
'forecast_cache_invalidations_total',
|
||||
'Total number of forecast cache invalidations',
|
||||
['tenant_id', 'reason'] # reason: model_retrain, manual, expiry
|
||||
)
|
||||
|
||||
# ================================================================
|
||||
# GENERAL SCHEDULER HEALTH METRICS
|
||||
# ================================================================
|
||||
|
||||
scheduler_health_status = Gauge(
|
||||
'scheduler_health_status',
|
||||
'Scheduler health status (1=healthy, 0=unhealthy)',
|
||||
['service', 'scheduler_type'] # service: production, orders; scheduler_type: daily, weekly, cleanup
|
||||
)
|
||||
|
||||
scheduler_last_run_timestamp = Gauge(
|
||||
'scheduler_last_run_timestamp',
|
||||
'Unix timestamp of last scheduler run',
|
||||
['service', 'scheduler_type']
|
||||
)
|
||||
|
||||
scheduler_next_run_timestamp = Gauge(
|
||||
'scheduler_next_run_timestamp',
|
||||
'Unix timestamp of next scheduled run',
|
||||
['service', 'scheduler_type']
|
||||
)
|
||||
|
||||
tenant_processing_timeout_total = Counter(
|
||||
'tenant_processing_timeout_total',
|
||||
'Total number of tenant processing timeouts',
|
||||
['service', 'tenant_id'] # service: production, procurement
|
||||
)
|
||||
|
||||
# ================================================================
|
||||
# HELPER FUNCTIONS FOR METRICS
|
||||
# ================================================================
|
||||
|
||||
|
||||
class SchedulerMetricsCollector:
|
||||
"""Helper class for collecting scheduler metrics"""
|
||||
|
||||
@staticmethod
|
||||
def record_production_schedule_generated(tenant_id: str, success: bool, duration_seconds: float, batches_created: int):
|
||||
"""Record production schedule generation"""
|
||||
status = 'success' if success else 'failure'
|
||||
production_schedules_generated_total.labels(tenant_id=tenant_id, status=status).inc()
|
||||
production_schedule_generation_duration_seconds.labels(tenant_id=tenant_id).observe(duration_seconds)
|
||||
|
||||
if success:
|
||||
production_batches_created_total.labels(tenant_id=tenant_id).inc(batches_created)
|
||||
|
||||
@staticmethod
|
||||
def record_procurement_plan_generated(tenant_id: str, success: bool, duration_seconds: float, requirements_count: int):
|
||||
"""Record procurement plan generation"""
|
||||
status = 'success' if success else 'failure'
|
||||
procurement_plans_generated_total.labels(tenant_id=tenant_id, status=status).inc()
|
||||
procurement_plan_generation_duration_seconds.labels(tenant_id=tenant_id).observe(duration_seconds)
|
||||
|
||||
if success:
|
||||
procurement_requirements_created_total.labels(
|
||||
tenant_id=tenant_id,
|
||||
priority='medium' # Default, should be updated with actual priority
|
||||
).inc(requirements_count)
|
||||
|
||||
@staticmethod
|
||||
def record_scheduler_run(service: str, trigger: str = 'scheduled'):
|
||||
"""Record scheduler execution"""
|
||||
if service == 'production':
|
||||
production_scheduler_runs_total.labels(trigger=trigger).inc()
|
||||
elif service == 'procurement':
|
||||
procurement_scheduler_runs_total.labels(trigger=trigger).inc()
|
||||
|
||||
@staticmethod
|
||||
def record_tenant_processing(service: str, status: str):
|
||||
"""Record tenant processing result"""
|
||||
if service == 'production':
|
||||
production_tenants_processed_total.labels(status=status).inc()
|
||||
elif service == 'procurement':
|
||||
procurement_tenants_processed_total.labels(status=status).inc()
|
||||
|
||||
@staticmethod
|
||||
def record_forecast_cache_lookup(tenant_id: str, hit: bool):
|
||||
"""Record forecast cache lookup"""
|
||||
if hit:
|
||||
forecast_cache_hits_total.labels(tenant_id=tenant_id).inc()
|
||||
else:
|
||||
forecast_cache_misses_total.labels(tenant_id=tenant_id).inc()
|
||||
|
||||
@staticmethod
|
||||
def update_forecast_cache_hit_rate(tenant_id: str, hit_rate_percent: float):
|
||||
"""Update forecast cache hit rate"""
|
||||
forecast_cache_hit_rate.labels(tenant_id=tenant_id).set(hit_rate_percent)
|
||||
|
||||
@staticmethod
|
||||
def record_plan_rejection(tenant_id: str, auto_regenerated: bool):
|
||||
"""Record procurement plan rejection"""
|
||||
procurement_plan_rejections_total.labels(
|
||||
tenant_id=tenant_id,
|
||||
auto_regenerated='true' if auto_regenerated else 'false'
|
||||
).inc()
|
||||
|
||||
@staticmethod
|
||||
def update_scheduler_health(service: str, scheduler_type: str, is_healthy: bool):
|
||||
"""Update scheduler health status"""
|
||||
scheduler_health_status.labels(
|
||||
service=service,
|
||||
scheduler_type=scheduler_type
|
||||
).set(1 if is_healthy else 0)
|
||||
|
||||
@staticmethod
|
||||
def record_timeout(service: str, tenant_id: str):
|
||||
"""Record tenant processing timeout"""
|
||||
tenant_processing_timeout_total.labels(
|
||||
service=service,
|
||||
tenant_id=tenant_id
|
||||
).inc()
|
||||
|
||||
|
||||
# Global metrics collector instance
|
||||
metrics_collector = SchedulerMetricsCollector()
|
||||
|
||||
|
||||
def get_scheduler_metrics_collector() -> SchedulerMetricsCollector:
|
||||
"""Get global scheduler metrics collector"""
|
||||
return metrics_collector
|
||||
276
shared/utils/timezone_helper.py
Normal file
276
shared/utils/timezone_helper.py
Normal file
@@ -0,0 +1,276 @@
|
||||
# shared/utils/timezone_helper.py
|
||||
"""
|
||||
Timezone Utility Helper for Bakery Management System
|
||||
|
||||
Provides timezone-aware date/time utilities for accurate scheduling across
|
||||
different geographic locations. All schedulers should use these utilities
|
||||
to ensure consistent behavior.
|
||||
"""
|
||||
|
||||
from datetime import datetime, date, time
|
||||
from typing import Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class TimezoneHelper:
|
||||
"""Helper class for timezone-aware operations"""
|
||||
|
||||
DEFAULT_TIMEZONE = "Europe/Madrid"
|
||||
VALID_TIMEZONES = {
|
||||
"Europe/Madrid", "Europe/London", "Europe/Paris", "Europe/Berlin",
|
||||
"America/New_York", "America/Chicago", "America/Los_Angeles",
|
||||
"Asia/Tokyo", "Asia/Shanghai", "Australia/Sydney",
|
||||
"UTC"
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def get_current_date_in_timezone(cls, timezone_str: str) -> date:
|
||||
"""
|
||||
Get current date in specified timezone
|
||||
|
||||
Args:
|
||||
timezone_str: IANA timezone string (e.g., "Europe/Madrid")
|
||||
|
||||
Returns:
|
||||
Current date in the specified timezone
|
||||
"""
|
||||
try:
|
||||
tz = ZoneInfo(timezone_str)
|
||||
return datetime.now(tz).date()
|
||||
except Exception as e:
|
||||
logger.warning(f"Invalid timezone {timezone_str}, using default",
|
||||
error=str(e))
|
||||
return datetime.now(ZoneInfo(cls.DEFAULT_TIMEZONE)).date()
|
||||
|
||||
@classmethod
|
||||
def get_current_datetime_in_timezone(cls, timezone_str: str) -> datetime:
|
||||
"""
|
||||
Get current datetime in specified timezone
|
||||
|
||||
Args:
|
||||
timezone_str: IANA timezone string
|
||||
|
||||
Returns:
|
||||
Current datetime in the specified timezone
|
||||
"""
|
||||
try:
|
||||
tz = ZoneInfo(timezone_str)
|
||||
return datetime.now(tz)
|
||||
except Exception as e:
|
||||
logger.warning(f"Invalid timezone {timezone_str}, using default",
|
||||
error=str(e))
|
||||
return datetime.now(ZoneInfo(cls.DEFAULT_TIMEZONE))
|
||||
|
||||
@classmethod
|
||||
def combine_date_time_in_timezone(
|
||||
cls,
|
||||
target_date: date,
|
||||
target_time: time,
|
||||
timezone_str: str
|
||||
) -> datetime:
|
||||
"""
|
||||
Combine date and time in specified timezone
|
||||
|
||||
Args:
|
||||
target_date: Date component
|
||||
target_time: Time component
|
||||
timezone_str: IANA timezone string
|
||||
|
||||
Returns:
|
||||
Datetime combining date and time in specified timezone
|
||||
"""
|
||||
try:
|
||||
tz = ZoneInfo(timezone_str)
|
||||
return datetime.combine(target_date, target_time, tzinfo=tz)
|
||||
except Exception as e:
|
||||
logger.warning(f"Invalid timezone {timezone_str}, using default",
|
||||
error=str(e))
|
||||
tz = ZoneInfo(cls.DEFAULT_TIMEZONE)
|
||||
return datetime.combine(target_date, target_time, tzinfo=tz)
|
||||
|
||||
@classmethod
|
||||
def convert_to_utc(cls, dt: datetime) -> datetime:
|
||||
"""
|
||||
Convert datetime to UTC
|
||||
|
||||
Args:
|
||||
dt: Datetime to convert (must be timezone-aware)
|
||||
|
||||
Returns:
|
||||
Datetime in UTC timezone
|
||||
"""
|
||||
if dt.tzinfo is None:
|
||||
logger.warning("Converting naive datetime to UTC, assuming UTC")
|
||||
return dt.replace(tzinfo=ZoneInfo("UTC"))
|
||||
|
||||
return dt.astimezone(ZoneInfo("UTC"))
|
||||
|
||||
@classmethod
|
||||
def convert_from_utc(cls, dt: datetime, target_timezone: str) -> datetime:
|
||||
"""
|
||||
Convert UTC datetime to target timezone
|
||||
|
||||
Args:
|
||||
dt: UTC datetime
|
||||
target_timezone: Target IANA timezone string
|
||||
|
||||
Returns:
|
||||
Datetime in target timezone
|
||||
"""
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=ZoneInfo("UTC"))
|
||||
|
||||
try:
|
||||
tz = ZoneInfo(target_timezone)
|
||||
return dt.astimezone(tz)
|
||||
except Exception as e:
|
||||
logger.warning(f"Invalid timezone {target_timezone}, using default",
|
||||
error=str(e))
|
||||
tz = ZoneInfo(cls.DEFAULT_TIMEZONE)
|
||||
return dt.astimezone(tz)
|
||||
|
||||
@classmethod
|
||||
def validate_timezone(cls, timezone_str: str) -> bool:
|
||||
"""
|
||||
Validate if timezone string is valid
|
||||
|
||||
Args:
|
||||
timezone_str: IANA timezone string to validate
|
||||
|
||||
Returns:
|
||||
True if valid, False otherwise
|
||||
"""
|
||||
try:
|
||||
ZoneInfo(timezone_str)
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def get_timezone_offset_hours(cls, timezone_str: str) -> float:
|
||||
"""
|
||||
Get current UTC offset for timezone in hours
|
||||
|
||||
Args:
|
||||
timezone_str: IANA timezone string
|
||||
|
||||
Returns:
|
||||
UTC offset in hours (e.g., +2.0 for CEST)
|
||||
"""
|
||||
try:
|
||||
tz = ZoneInfo(timezone_str)
|
||||
now = datetime.now(tz)
|
||||
offset_seconds = now.utcoffset().total_seconds()
|
||||
return offset_seconds / 3600
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not get offset for {timezone_str}",
|
||||
error=str(e))
|
||||
return 0.0
|
||||
|
||||
@classmethod
|
||||
def is_business_hours(
|
||||
cls,
|
||||
dt: Optional[datetime] = None,
|
||||
timezone_str: str = DEFAULT_TIMEZONE,
|
||||
start_hour: int = 8,
|
||||
end_hour: int = 20
|
||||
) -> bool:
|
||||
"""
|
||||
Check if datetime is within business hours
|
||||
|
||||
Args:
|
||||
dt: Datetime to check (defaults to now)
|
||||
timezone_str: IANA timezone string
|
||||
start_hour: Business hours start (24h format)
|
||||
end_hour: Business hours end (24h format)
|
||||
|
||||
Returns:
|
||||
True if within business hours, False otherwise
|
||||
"""
|
||||
if dt is None:
|
||||
dt = cls.get_current_datetime_in_timezone(timezone_str)
|
||||
elif dt.tzinfo is None:
|
||||
# Assume it's in the target timezone
|
||||
tz = ZoneInfo(timezone_str)
|
||||
dt = dt.replace(tzinfo=tz)
|
||||
else:
|
||||
# Convert to target timezone
|
||||
dt = cls.convert_from_utc(dt, timezone_str)
|
||||
|
||||
# Check if weekday (Monday=0, Sunday=6)
|
||||
if dt.weekday() >= 5: # Saturday or Sunday
|
||||
return False
|
||||
|
||||
# Check if within business hours
|
||||
return start_hour <= dt.hour < end_hour
|
||||
|
||||
@classmethod
|
||||
def get_next_business_day_at_time(
|
||||
cls,
|
||||
target_time: time,
|
||||
timezone_str: str = DEFAULT_TIMEZONE,
|
||||
from_datetime: Optional[datetime] = None
|
||||
) -> datetime:
|
||||
"""
|
||||
Get next business day at specific time in timezone
|
||||
|
||||
Args:
|
||||
target_time: Time to schedule (e.g., time(6, 0) for 6 AM)
|
||||
timezone_str: IANA timezone string
|
||||
from_datetime: Starting datetime (defaults to now)
|
||||
|
||||
Returns:
|
||||
Next business day at target_time in specified timezone
|
||||
"""
|
||||
if from_datetime is None:
|
||||
current = cls.get_current_datetime_in_timezone(timezone_str)
|
||||
else:
|
||||
current = cls.convert_from_utc(from_datetime, timezone_str)
|
||||
|
||||
# Start with next day
|
||||
next_day = current.date()
|
||||
next_datetime = cls.combine_date_time_in_timezone(
|
||||
next_day, target_time, timezone_str
|
||||
)
|
||||
|
||||
# If we haven't passed target_time today, use today
|
||||
if current.time() < target_time:
|
||||
next_datetime = cls.combine_date_time_in_timezone(
|
||||
current.date(), target_time, timezone_str
|
||||
)
|
||||
|
||||
# Skip weekends
|
||||
while next_datetime.weekday() >= 5: # Saturday or Sunday
|
||||
next_day = next_datetime.date()
|
||||
from datetime import timedelta
|
||||
next_day = next_day + timedelta(days=1)
|
||||
next_datetime = cls.combine_date_time_in_timezone(
|
||||
next_day, target_time, timezone_str
|
||||
)
|
||||
|
||||
return next_datetime
|
||||
|
||||
|
||||
# Convenience functions for common operations
|
||||
|
||||
def get_tenant_current_date(tenant_timezone: str = "Europe/Madrid") -> date:
|
||||
"""Get current date for tenant's timezone"""
|
||||
return TimezoneHelper.get_current_date_in_timezone(tenant_timezone)
|
||||
|
||||
|
||||
def get_tenant_current_datetime(tenant_timezone: str = "Europe/Madrid") -> datetime:
|
||||
"""Get current datetime for tenant's timezone"""
|
||||
return TimezoneHelper.get_current_datetime_in_timezone(tenant_timezone)
|
||||
|
||||
|
||||
def is_tenant_business_hours(tenant_timezone: str = "Europe/Madrid") -> bool:
|
||||
"""Check if it's currently business hours for tenant"""
|
||||
return TimezoneHelper.is_business_hours(timezone_str=tenant_timezone)
|
||||
|
||||
|
||||
def validate_timezone(timezone_str: str) -> bool:
|
||||
"""Validate timezone string"""
|
||||
return TimezoneHelper.validate_timezone(timezone_str)
|
||||
Reference in New Issue
Block a user