New alert service

This commit is contained in:
Urtzi Alfaro
2025-12-05 20:07:01 +01:00
parent 1fe3a73549
commit 667e6e0404
393 changed files with 26002 additions and 61033 deletions

View File

@@ -0,0 +1,165 @@
# services/production/app/api/batch.py
"""
Production Batch API - Batch operations for enterprise dashboards
Phase 2 optimization: Eliminate N+1 query patterns by fetching production data
for multiple tenants in a single request.
"""
from fastapi import APIRouter, Depends, HTTPException, Body
from typing import List, Dict, Any
from uuid import UUID
from pydantic import BaseModel, Field
import structlog
import asyncio
from app.services.production_service import ProductionService
from app.core.config import settings
from shared.auth.decorators import get_current_user_dep
router = APIRouter(tags=["production-batch"])
logger = structlog.get_logger()
def get_production_service() -> ProductionService:
"""Dependency injection for production service"""
from app.core.database import database_manager
return ProductionService(database_manager, settings)
class ProductionSummaryBatchRequest(BaseModel):
"""Request model for batch production summary"""
tenant_ids: List[str] = Field(..., description="List of tenant IDs", max_length=100)
class ProductionSummary(BaseModel):
"""Production summary for a single tenant"""
tenant_id: str
total_batches: int
pending_batches: int
in_progress_batches: int
completed_batches: int
on_hold_batches: int
cancelled_batches: int
total_planned_quantity: float
total_actual_quantity: float
efficiency_rate: float
@router.post("/batch/production-summary", response_model=Dict[str, ProductionSummary])
async def get_production_summary_batch(
request: ProductionSummaryBatchRequest = Body(...),
current_user: Dict[str, Any] = Depends(get_current_user_dep),
production_service: ProductionService = Depends(get_production_service)
):
"""
Get production summary for multiple tenants in a single request.
Optimized for enterprise dashboards to eliminate N+1 query patterns.
Fetches production data for all tenants in parallel.
Args:
request: Batch request with tenant IDs
Returns:
Dictionary mapping tenant_id -> production summary
Example:
POST /api/v1/production/batch/production-summary
{
"tenant_ids": ["tenant-1", "tenant-2", "tenant-3"]
}
Response:
{
"tenant-1": {"tenant_id": "tenant-1", "total_batches": 25, ...},
"tenant-2": {"tenant_id": "tenant-2", "total_batches": 18, ...},
"tenant-3": {"tenant_id": "tenant-3", "total_batches": 32, ...}
}
"""
try:
if len(request.tenant_ids) > 100:
raise HTTPException(
status_code=400,
detail="Maximum 100 tenant IDs allowed per batch request"
)
if not request.tenant_ids:
return {}
logger.info(
"Batch fetching production summaries",
tenant_count=len(request.tenant_ids)
)
async def fetch_tenant_production(tenant_id: str) -> tuple[str, ProductionSummary]:
"""Fetch production summary for a single tenant"""
try:
tenant_uuid = UUID(tenant_id)
summary = await production_service.get_dashboard_summary(tenant_uuid)
# Calculate efficiency rate
efficiency_rate = 0.0
if summary.total_planned_quantity > 0 and summary.total_actual_quantity is not None:
efficiency_rate = (summary.total_actual_quantity / summary.total_planned_quantity) * 100
return tenant_id, ProductionSummary(
tenant_id=tenant_id,
total_batches=int(summary.total_batches or 0),
pending_batches=int(summary.pending_batches or 0),
in_progress_batches=int(summary.in_progress_batches or 0),
completed_batches=int(summary.completed_batches or 0),
on_hold_batches=int(summary.on_hold_batches or 0),
cancelled_batches=int(summary.cancelled_batches or 0),
total_planned_quantity=float(summary.total_planned_quantity or 0),
total_actual_quantity=float(summary.total_actual_quantity or 0),
efficiency_rate=efficiency_rate
)
except Exception as e:
logger.warning(
"Failed to fetch production for tenant in batch",
tenant_id=tenant_id,
error=str(e)
)
return tenant_id, ProductionSummary(
tenant_id=tenant_id,
total_batches=0,
pending_batches=0,
in_progress_batches=0,
completed_batches=0,
on_hold_batches=0,
cancelled_batches=0,
total_planned_quantity=0.0,
total_actual_quantity=0.0,
efficiency_rate=0.0
)
# Fetch all tenant production data in parallel
tasks = [fetch_tenant_production(tid) for tid in request.tenant_ids]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Build result dictionary
result_dict = {}
for result in results:
if isinstance(result, Exception):
logger.error("Exception in batch production fetch", error=str(result))
continue
tenant_id, summary = result
result_dict[tenant_id] = summary
logger.info(
"Batch production summaries retrieved",
requested_count=len(request.tenant_ids),
successful_count=len(result_dict)
)
return result_dict
except HTTPException:
raise
except Exception as e:
logger.error("Error in batch production summary", error=str(e), exc_info=True)
raise HTTPException(
status_code=500,
detail=f"Failed to fetch batch production summaries: {str(e)}"
)

View File

@@ -0,0 +1,85 @@
# services/production/app/api/internal_alert_trigger.py
"""
Internal API for triggering production alerts.
Used by demo session cloning to generate realistic production delay alerts.
"""
from fastapi import APIRouter, HTTPException, Request, Path
from uuid import UUID
import structlog
logger = structlog.get_logger()
router = APIRouter()
@router.post("/api/internal/production-alerts/trigger/{tenant_id}")
async def trigger_production_alerts(
tenant_id: UUID = Path(..., description="Tenant ID to check production for"),
request: Request = None
) -> dict:
"""
Trigger production alert checks for a specific tenant (internal use only).
This endpoint is called by the demo session cloning process after production
batches are seeded to generate realistic production delay alerts.
Security: Protected by X-Internal-Service header check.
"""
try:
# Verify internal service header
if not request or request.headers.get("X-Internal-Service") not in ["demo-session", "internal"]:
logger.warning("Unauthorized internal API call", tenant_id=str(tenant_id))
raise HTTPException(
status_code=403,
detail="This endpoint is for internal service use only"
)
# Get production alert service from app state
production_alert_service = getattr(request.app.state, 'production_alert_service', None)
if not production_alert_service:
logger.error("Production alert service not initialized")
raise HTTPException(
status_code=500,
detail="Production alert service not available"
)
# Trigger production alert checks (checks all tenants, including this one)
logger.info("Triggering production alert checks", tenant_id=str(tenant_id))
await production_alert_service.check_production_delays()
# Return success (service checks all tenants, we can't get specific count)
result = {"total_alerts": 0, "message": "Production alert checks triggered"}
logger.info(
"Production alert checks completed",
tenant_id=str(tenant_id),
alerts_generated=result.get("total_alerts", 0)
)
return {
"success": True,
"tenant_id": str(tenant_id),
"alerts_generated": result.get("total_alerts", 0),
"breakdown": {
"critical": result.get("critical", 0),
"high": result.get("high", 0),
"medium": result.get("medium", 0),
"low": result.get("low", 0)
}
}
except HTTPException:
raise
except Exception as e:
logger.error(
"Error triggering production alerts",
tenant_id=str(tenant_id),
error=str(e),
exc_info=True
)
raise HTTPException(
status_code=500,
detail=f"Failed to trigger production alerts: {str(e)}"
)

View File

@@ -25,6 +25,7 @@ from app.schemas.production import (
ProductionStatusEnum
)
from app.core.config import settings
from app.utils.cache import get_cached, set_cached, make_cache_key
logger = structlog.get_logger()
route_builder = RouteBuilder('production')
@@ -56,8 +57,23 @@ async def list_production_batches(
current_user: dict = Depends(get_current_user_dep),
production_service: ProductionService = Depends(get_production_service)
):
"""List batches with filters: date, status, product, order_id"""
"""List batches with filters: date, status, product, order_id (with Redis caching - 20s TTL)"""
try:
# PERFORMANCE OPTIMIZATION: Cache frequently accessed queries (status filter, first page)
cache_key = None
if page == 1 and product_id is None and order_id is None and start_date is None and end_date is None:
# Cache simple status-filtered queries (common for dashboards)
cache_key = make_cache_key(
"production_batches",
str(tenant_id),
status=status.value if status else None,
page_size=page_size
)
cached_result = await get_cached(cache_key)
if cached_result is not None:
logger.debug("Cache hit for production batches", cache_key=cache_key, tenant_id=str(tenant_id), status=status)
return ProductionBatchListResponse(**cached_result)
filters = {
"status": status,
"product_id": str(product_id) if product_id else None,
@@ -68,6 +84,11 @@ async def list_production_batches(
batch_list = await production_service.get_production_batches_list(tenant_id, filters, page, page_size)
# Cache the result if applicable (20s TTL for production batches)
if cache_key:
await set_cached(cache_key, batch_list.model_dump(), ttl=20)
logger.debug("Cached production batches", cache_key=cache_key, ttl=20, tenant_id=str(tenant_id), status=status)
logger.info("Retrieved production batches list",
tenant_id=str(tenant_id), filters=filters)

View File

@@ -14,6 +14,7 @@ from shared.routing import RouteBuilder
from app.services.production_service import ProductionService
from app.schemas.production import ProductionDashboardSummary
from app.core.config import settings
from app.utils.cache import get_cached, set_cached, make_cache_key
logger = structlog.get_logger()
route_builder = RouteBuilder('production')
@@ -35,10 +36,22 @@ async def get_dashboard_summary(
current_user: dict = Depends(get_current_user_dep),
production_service: ProductionService = Depends(get_production_service)
):
"""Get production dashboard summary"""
"""Get production dashboard summary with caching (60s TTL)"""
try:
# PHASE 2: Check cache first
cache_key = make_cache_key("production_dashboard", str(tenant_id))
cached_result = await get_cached(cache_key)
if cached_result is not None:
logger.debug("Cache hit for production dashboard", cache_key=cache_key, tenant_id=str(tenant_id))
return ProductionDashboardSummary(**cached_result)
# Cache miss - fetch from database
summary = await production_service.get_dashboard_summary(tenant_id)
# PHASE 2: Cache the result (60s TTL for production batches)
await set_cached(cache_key, summary.model_dump(), ttl=60)
logger.debug("Cached production dashboard", cache_key=cache_key, ttl=60, tenant_id=str(tenant_id))
logger.info("Retrieved production dashboard summary",
tenant_id=str(tenant_id))

View File

@@ -27,14 +27,16 @@ from app.api import (
orchestrator, # NEW: Orchestrator integration endpoint
production_orders_operations, # Tenant deletion endpoints
audit,
ml_insights # ML insights endpoint
ml_insights, # ML insights endpoint
batch
)
from app.api.internal_alert_trigger import router as internal_alert_trigger_router
class ProductionService(StandardFastAPIService):
"""Production Service with standardized setup"""
expected_migration_version = "00001"
expected_migration_version = "001_initial_schema"
async def on_startup(self, app):
"""Custom startup logic including migration verification"""
@@ -63,6 +65,8 @@ class ProductionService(StandardFastAPIService):
]
self.alert_service = None
self.rabbitmq_client = None
self.event_publisher = None
# REMOVED: scheduler_service (replaced by Orchestrator Service)
# Create custom checks for services
@@ -84,22 +88,53 @@ class ProductionService(StandardFastAPIService):
expected_tables=production_expected_tables,
custom_health_checks={
"alert_service": check_alert_service
}
},
enable_messaging=True # Enable messaging support
)
async def _setup_messaging(self):
"""Setup messaging for production service using unified messaging"""
from shared.messaging import UnifiedEventPublisher, RabbitMQClient
try:
self.rabbitmq_client = RabbitMQClient(settings.RABBITMQ_URL, service_name="production-service")
await self.rabbitmq_client.connect()
# Create unified event publisher
self.event_publisher = UnifiedEventPublisher(self.rabbitmq_client, "production-service")
self.logger.info("Production service unified messaging setup completed")
except Exception as e:
self.logger.error("Failed to setup production unified messaging", error=str(e))
raise
async def _cleanup_messaging(self):
"""Cleanup messaging for production service"""
try:
if self.rabbitmq_client:
await self.rabbitmq_client.disconnect()
self.logger.info("Production service messaging cleanup completed")
except Exception as e:
self.logger.error("Error during production messaging cleanup", error=str(e))
async def on_startup(self, app: FastAPI):
"""Custom startup logic for production service"""
# Initialize alert service
self.alert_service = ProductionAlertService(settings)
# Initialize messaging
await self._setup_messaging()
# Initialize alert service with EventPublisher and database manager
self.alert_service = ProductionAlertService(self.event_publisher, self.database_manager)
await self.alert_service.start()
self.logger.info("Production alert service started")
# Store services in app state
app.state.alert_service = self.alert_service
app.state.production_alert_service = self.alert_service # Also store with this name for internal trigger
# REMOVED: Production scheduler service initialization
# Scheduling is now handled by the Orchestrator Service
# which calls our /generate-schedule endpoint
# Store services in app state
app.state.alert_service = self.alert_service
app.state.production_alert_service = self.alert_service # Also store with this name for internal trigger
async def on_shutdown(self, app: FastAPI):
"""Custom shutdown logic for production service"""
@@ -108,6 +143,9 @@ class ProductionService(StandardFastAPIService):
await self.alert_service.stop()
self.logger.info("Alert service stopped")
# Cleanup messaging
await self._cleanup_messaging()
def get_service_features(self):
"""Return production-specific features"""
return [
@@ -155,6 +193,7 @@ service.setup_custom_middleware()
# NOTE: Register more specific routes before generic parameterized routes
# IMPORTANT: Register audit router FIRST to avoid route matching conflicts
service.add_router(audit.router)
service.add_router(batch.router)
service.add_router(orchestrator.router) # NEW: Orchestrator integration endpoint
service.add_router(production_orders_operations.router) # Tenant deletion endpoints
service.add_router(quality_templates.router) # Register first to avoid route conflicts
@@ -166,6 +205,7 @@ service.add_router(production_dashboard.router)
service.add_router(analytics.router)
service.add_router(internal_demo.router)
service.add_router(ml_insights.router) # ML insights endpoint
service.add_router(internal_alert_trigger_router) # Internal alert trigger for demo cloning
# REMOVED: test_production_scheduler endpoint
# Production scheduling is now triggered by the Orchestrator Service

File diff suppressed because it is too large Load Diff

View File

@@ -1,38 +1,33 @@
"""
Production Notification Service
Production Notification Service - Simplified
Emits informational notifications for production state changes:
- batch_state_changed: When batch transitions between states
- batch_completed: When batch production completes
- batch_started: When batch production begins
Emits minimal events using EventPublisher.
All enrichment handled by alert_processor.
These are NOTIFICATIONS (not alerts) - informational state changes that don't require user action.
"""
import logging
from datetime import datetime, timezone
from typing import Optional, Dict, Any
from sqlalchemy.orm import Session
from uuid import UUID
import structlog
from shared.schemas.event_classification import RawEvent, EventClass, EventDomain
from shared.alerts.base_service import BaseAlertService
from shared.messaging import UnifiedEventPublisher
logger = structlog.get_logger()
logger = logging.getLogger(__name__)
class ProductionNotificationService(BaseAlertService):
class ProductionNotificationService:
"""
Service for emitting production notifications (informational state changes).
Service for emitting production notifications using EventPublisher.
"""
def __init__(self, rabbitmq_url: str = None):
super().__init__(service_name="production", rabbitmq_url=rabbitmq_url)
def __init__(self, event_publisher: UnifiedEventPublisher):
self.publisher = event_publisher
async def emit_batch_state_changed_notification(
self,
db: Session,
tenant_id: str,
tenant_id: UUID,
batch_id: str,
product_sku: str,
product_name: str,
@@ -44,76 +39,50 @@ class ProductionNotificationService(BaseAlertService):
) -> None:
"""
Emit notification when a production batch changes state.
Args:
db: Database session
tenant_id: Tenant ID
batch_id: Production batch ID
product_sku: Product SKU
product_name: Product name
old_status: Previous status (PENDING, IN_PROGRESS, COMPLETED, etc.)
new_status: New status
quantity: Batch quantity
unit: Unit of measurement
assigned_to: Assigned worker/station (optional)
"""
try:
# Build message based on state transition
transition_messages = {
("PENDING", "IN_PROGRESS"): f"Production started for {product_name}",
("IN_PROGRESS", "COMPLETED"): f"Production completed for {product_name}",
("IN_PROGRESS", "PAUSED"): f"Production paused for {product_name}",
("PAUSED", "IN_PROGRESS"): f"Production resumed for {product_name}",
("IN_PROGRESS", "FAILED"): f"Production failed for {product_name}",
}
# Build message based on state transition
transition_messages = {
("PENDING", "IN_PROGRESS"): f"Production started for {product_name}",
("IN_PROGRESS", "COMPLETED"): f"Production completed for {product_name}",
("IN_PROGRESS", "PAUSED"): f"Production paused for {product_name}",
("PAUSED", "IN_PROGRESS"): f"Production resumed for {product_name}",
("IN_PROGRESS", "FAILED"): f"Production failed for {product_name}",
}
message = transition_messages.get(
(old_status, new_status),
f"{product_name} status changed from {old_status} to {new_status}"
)
message = transition_messages.get(
(old_status, new_status),
f"{product_name} status changed from {old_status} to {new_status}"
)
# Create notification event
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.PRODUCTION,
event_type="batch_state_changed",
title=f"Batch Status: {new_status}",
message=f"{message} ({quantity} {unit})",
service="production",
event_metadata={
"batch_id": batch_id,
"product_sku": product_sku,
"product_name": product_name,
"old_status": old_status,
"new_status": new_status,
"quantity": quantity,
"unit": unit,
"assigned_to": assigned_to,
"state_changed_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
metadata = {
"batch_id": batch_id,
"product_sku": product_sku,
"product_name": product_name,
"old_status": old_status,
"new_status": new_status,
"quantity": float(quantity),
"unit": unit,
"assigned_to": assigned_to,
"state_changed_at": datetime.now(timezone.utc).isoformat(),
}
# Publish to RabbitMQ for processing
await self.publish_item(tenant_id, event.dict(), item_type="notification")
await self.publisher.publish_notification(
event_type="production.batch_state_changed",
tenant_id=tenant_id,
data=metadata
)
logger.info(
f"Batch state change notification emitted: {batch_id} ({old_status}{new_status})",
extra={"tenant_id": tenant_id, "batch_id": batch_id}
)
except Exception as e:
logger.error(
f"Failed to emit batch state change notification: {e}",
extra={"tenant_id": tenant_id, "batch_id": batch_id},
exc_info=True,
)
logger.info(
"batch_state_changed_notification_emitted",
tenant_id=str(tenant_id),
batch_id=batch_id,
old_status=old_status,
new_status=new_status
)
async def emit_batch_completed_notification(
self,
db: Session,
tenant_id: str,
tenant_id: UUID,
batch_id: str,
product_sku: str,
product_name: str,
@@ -124,64 +93,42 @@ class ProductionNotificationService(BaseAlertService):
) -> None:
"""
Emit notification when a production batch is completed.
Args:
db: Database session
tenant_id: Tenant ID
batch_id: Production batch ID
product_sku: Product SKU
product_name: Product name
quantity_produced: Quantity produced
unit: Unit of measurement
production_duration_minutes: Total production time (optional)
quality_score: Quality score (0-100, optional)
"""
try:
message = f"Produced {quantity_produced} {unit} of {product_name}"
if production_duration_minutes:
message += f" in {production_duration_minutes} minutes"
if quality_score:
message += f" (Quality: {quality_score:.1f}%)"
message_parts = [f"Produced {quantity_produced} {unit} of {product_name}"]
if production_duration_minutes:
message_parts.append(f"in {production_duration_minutes} minutes")
if quality_score:
message_parts.append(f"(Quality: {quality_score:.1f}%)")
message = " ".join(message_parts)
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.PRODUCTION,
event_type="batch_completed",
title=f"Batch Completed: {product_name}",
message=message,
service="production",
event_metadata={
"batch_id": batch_id,
"product_sku": product_sku,
"product_name": product_name,
"quantity_produced": quantity_produced,
"unit": unit,
"production_duration_minutes": production_duration_minutes,
"quality_score": quality_score,
"completed_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
metadata = {
"batch_id": batch_id,
"product_sku": product_sku,
"product_name": product_name,
"quantity_produced": float(quantity_produced),
"unit": unit,
"production_duration_minutes": production_duration_minutes,
"quality_score": quality_score,
"completed_at": datetime.now(timezone.utc).isoformat(),
}
await self.publish_item(tenant_id, event.dict(), item_type="notification")
await self.publisher.publish_notification(
event_type="production.batch_completed",
tenant_id=tenant_id,
data=metadata
)
logger.info(
f"Batch completed notification emitted: {batch_id} ({quantity_produced} {unit})",
extra={"tenant_id": tenant_id, "batch_id": batch_id}
)
except Exception as e:
logger.error(
f"Failed to emit batch completed notification: {e}",
extra={"tenant_id": tenant_id, "batch_id": batch_id},
exc_info=True,
)
logger.info(
"batch_completed_notification_emitted",
tenant_id=str(tenant_id),
batch_id=batch_id,
quantity_produced=quantity_produced
)
async def emit_batch_started_notification(
self,
db: Session,
tenant_id: str,
tenant_id: UUID,
batch_id: str,
product_sku: str,
product_name: str,
@@ -192,64 +139,41 @@ class ProductionNotificationService(BaseAlertService):
) -> None:
"""
Emit notification when a production batch is started.
Args:
db: Database session
tenant_id: Tenant ID
batch_id: Production batch ID
product_sku: Product SKU
product_name: Product name
quantity_planned: Planned quantity
unit: Unit of measurement
estimated_duration_minutes: Estimated duration (optional)
assigned_to: Assigned worker/station (optional)
"""
try:
message = f"Started production of {quantity_planned} {unit} of {product_name}"
if estimated_duration_minutes:
message += f" (Est. {estimated_duration_minutes} min)"
if assigned_to:
message += f" - Assigned to {assigned_to}"
message_parts = [f"Started production of {quantity_planned} {unit} of {product_name}"]
if estimated_duration_minutes:
message_parts.append(f"(Est. {estimated_duration_minutes} min)")
if assigned_to:
message_parts.append(f"- Assigned to {assigned_to}")
message = " ".join(message_parts)
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.PRODUCTION,
event_type="batch_started",
title=f"Batch Started: {product_name}",
message=message,
service="production",
event_metadata={
"batch_id": batch_id,
"product_sku": product_sku,
"product_name": product_name,
"quantity_planned": quantity_planned,
"unit": unit,
"estimated_duration_minutes": estimated_duration_minutes,
"assigned_to": assigned_to,
"started_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
metadata = {
"batch_id": batch_id,
"product_sku": product_sku,
"product_name": product_name,
"quantity_planned": float(quantity_planned),
"unit": unit,
"estimated_duration_minutes": estimated_duration_minutes,
"assigned_to": assigned_to,
"started_at": datetime.now(timezone.utc).isoformat(),
}
await self.publish_item(tenant_id, event.dict(), item_type="notification")
await self.publisher.publish_notification(
event_type="production.batch_started",
tenant_id=tenant_id,
data=metadata
)
logger.info(
f"Batch started notification emitted: {batch_id}",
extra={"tenant_id": tenant_id, "batch_id": batch_id}
)
except Exception as e:
logger.error(
f"Failed to emit batch started notification: {e}",
extra={"tenant_id": tenant_id, "batch_id": batch_id},
exc_info=True,
)
logger.info(
"batch_started_notification_emitted",
tenant_id=str(tenant_id),
batch_id=batch_id
)
async def emit_equipment_status_notification(
self,
db: Session,
tenant_id: str,
tenant_id: UUID,
equipment_id: str,
equipment_name: str,
old_status: str,
@@ -258,50 +182,29 @@ class ProductionNotificationService(BaseAlertService):
) -> None:
"""
Emit notification when equipment status changes.
Args:
db: Database session
tenant_id: Tenant ID
equipment_id: Equipment ID
equipment_name: Equipment name
old_status: Previous status
new_status: New status
reason: Reason for status change (optional)
"""
try:
message = f"{equipment_name} status: {old_status}{new_status}"
if reason:
message += f" - {reason}"
message = f"{equipment_name} status: {old_status}{new_status}"
if reason:
message += f" - {reason}"
event = RawEvent(
tenant_id=tenant_id,
event_class=EventClass.NOTIFICATION,
event_domain=EventDomain.PRODUCTION,
event_type="equipment_status_changed",
title=f"Equipment Status: {equipment_name}",
message=message,
service="production",
event_metadata={
"equipment_id": equipment_id,
"equipment_name": equipment_name,
"old_status": old_status,
"new_status": new_status,
"reason": reason,
"status_changed_at": datetime.now(timezone.utc).isoformat(),
},
timestamp=datetime.now(timezone.utc),
)
metadata = {
"equipment_id": equipment_id,
"equipment_name": equipment_name,
"old_status": old_status,
"new_status": new_status,
"reason": reason,
"status_changed_at": datetime.now(timezone.utc).isoformat(),
}
await self.publish_item(tenant_id, event.dict(), item_type="notification")
await self.publisher.publish_notification(
event_type="production.equipment_status_changed",
tenant_id=tenant_id,
data=metadata
)
logger.info(
f"Equipment status notification emitted: {equipment_name}",
extra={"tenant_id": tenant_id, "equipment_id": equipment_id}
)
except Exception as e:
logger.error(
f"Failed to emit equipment status notification: {e}",
extra={"tenant_id": tenant_id, "equipment_id": equipment_id},
exc_info=True,
)
logger.info(
"equipment_status_notification_emitted",
tenant_id=str(tenant_id),
equipment_id=equipment_id,
new_status=new_status
)

View File

@@ -24,6 +24,7 @@ from app.schemas.production import (
ProductionScheduleCreate, ProductionScheduleUpdate, ProductionScheduleResponse,
DailyProductionRequirements, ProductionDashboardSummary, ProductionMetrics
)
from app.utils.cache import delete_cached, make_cache_key
logger = structlog.get_logger()
@@ -324,12 +325,17 @@ class ProductionService:
await self._update_inventory_on_completion(
tenant_id, batch, status_update.actual_quantity
)
logger.info("Updated batch status",
batch_id=str(batch_id),
# PHASE 2: Invalidate production dashboard cache
cache_key = make_cache_key("production_dashboard", str(tenant_id))
await delete_cached(cache_key)
logger.debug("Invalidated production dashboard cache", cache_key=cache_key, tenant_id=str(tenant_id))
logger.info("Updated batch status",
batch_id=str(batch_id),
new_status=status_update.status.value,
tenant_id=str(tenant_id))
return batch
except Exception as e:
@@ -658,7 +664,26 @@ class ProductionService:
logger.info("Started production batch",
batch_id=str(batch_id), tenant_id=str(tenant_id))
return batch
# Acknowledge production delay alerts (non-blocking)
try:
from shared.clients.alert_processor_client import get_alert_processor_client
alert_client = get_alert_processor_client(self.config, "production")
await alert_client.acknowledge_alerts_by_metadata(
tenant_id=tenant_id,
alert_type="production_delay",
metadata_filter={"batch_id": str(batch_id)}
)
await alert_client.acknowledge_alerts_by_metadata(
tenant_id=tenant_id,
alert_type="batch_at_risk",
metadata_filter={"batch_id": str(batch_id)}
)
logger.debug("Acknowledged production delay alerts", batch_id=str(batch_id))
except Exception as e:
# Log but don't fail the batch start
logger.warning("Failed to acknowledge production alerts", batch_id=str(batch_id), error=str(e))
return batch
except Exception as e:
logger.error("Error starting production batch",

View File

@@ -0,0 +1,26 @@
# services/alert_processor/app/utils/__init__.py
"""
Utility modules for alert processor service
"""
from .cache import (
get_redis_client,
close_redis,
get_cached,
set_cached,
delete_cached,
delete_pattern,
cache_response,
make_cache_key,
)
__all__ = [
'get_redis_client',
'close_redis',
'get_cached',
'set_cached',
'delete_cached',
'delete_pattern',
'cache_response',
'make_cache_key',
]

View File

@@ -0,0 +1,265 @@
# services/orchestrator/app/utils/cache.py
"""
Redis caching utilities for dashboard endpoints
"""
import json
import redis.asyncio as redis
from typing import Optional, Any, Callable
from functools import wraps
import structlog
from app.core.config import settings
from pydantic import BaseModel
logger = structlog.get_logger()
# Redis client instance
_redis_client: Optional[redis.Redis] = None
async def get_redis_client() -> redis.Redis:
"""Get or create Redis client"""
global _redis_client
if _redis_client is None:
try:
# Check if TLS is enabled - convert string to boolean properly
redis_tls_str = str(getattr(settings, 'REDIS_TLS_ENABLED', 'false')).lower()
redis_tls_enabled = redis_tls_str in ('true', '1', 'yes', 'on')
connection_kwargs = {
'host': str(getattr(settings, 'REDIS_HOST', 'localhost')),
'port': int(getattr(settings, 'REDIS_PORT', 6379)),
'db': int(getattr(settings, 'REDIS_DB', 0)),
'decode_responses': True,
'socket_connect_timeout': 5,
'socket_timeout': 5
}
# Add password if configured
redis_password = getattr(settings, 'REDIS_PASSWORD', None)
if redis_password:
connection_kwargs['password'] = redis_password
# Add SSL/TLS support if enabled
if redis_tls_enabled:
import ssl
connection_kwargs['ssl'] = True
connection_kwargs['ssl_cert_reqs'] = ssl.CERT_NONE
logger.debug(f"Redis TLS enabled - connecting with SSL to {connection_kwargs['host']}:{connection_kwargs['port']}")
_redis_client = redis.Redis(**connection_kwargs)
# Test connection
await _redis_client.ping()
logger.info(f"Redis client connected successfully (TLS: {redis_tls_enabled})")
except Exception as e:
logger.warning(f"Failed to connect to Redis: {e}. Caching will be disabled.")
_redis_client = None
return _redis_client
async def close_redis():
"""Close Redis connection"""
global _redis_client
if _redis_client:
await _redis_client.close()
_redis_client = None
logger.info("Redis connection closed")
async def get_cached(key: str) -> Optional[Any]:
"""
Get cached value by key
Args:
key: Cache key
Returns:
Cached value (deserialized from JSON) or None if not found or error
"""
try:
client = await get_redis_client()
if not client:
return None
cached = await client.get(key)
if cached:
logger.debug(f"Cache hit: {key}")
return json.loads(cached)
else:
logger.debug(f"Cache miss: {key}")
return None
except Exception as e:
logger.warning(f"Cache get error for key {key}: {e}")
return None
def _serialize_value(value: Any) -> Any:
"""
Recursively serialize values for JSON storage, handling Pydantic models properly.
Args:
value: Value to serialize
Returns:
JSON-serializable value
"""
if isinstance(value, BaseModel):
# Convert Pydantic model to dictionary
return value.model_dump()
elif isinstance(value, (list, tuple)):
# Recursively serialize list/tuple elements
return [_serialize_value(item) for item in value]
elif isinstance(value, dict):
# Recursively serialize dictionary values
return {key: _serialize_value(val) for key, val in value.items()}
else:
# For other types, use default serialization
return value
async def set_cached(key: str, value: Any, ttl: int = 60) -> bool:
"""
Set cached value with TTL
Args:
key: Cache key
value: Value to cache (will be JSON serialized)
ttl: Time to live in seconds
Returns:
True if successful, False otherwise
"""
try:
client = await get_redis_client()
if not client:
return False
# Serialize value properly before JSON encoding
serialized_value = _serialize_value(value)
serialized = json.dumps(serialized_value)
await client.setex(key, ttl, serialized)
logger.debug(f"Cache set: {key} (TTL: {ttl}s)")
return True
except Exception as e:
logger.warning(f"Cache set error for key {key}: {e}")
return False
async def delete_cached(key: str) -> bool:
"""
Delete cached value
Args:
key: Cache key
Returns:
True if successful, False otherwise
"""
try:
client = await get_redis_client()
if not client:
return False
await client.delete(key)
logger.debug(f"Cache deleted: {key}")
return True
except Exception as e:
logger.warning(f"Cache delete error for key {key}: {e}")
return False
async def delete_pattern(pattern: str) -> int:
"""
Delete all keys matching pattern
Args:
pattern: Redis key pattern (e.g., "dashboard:*")
Returns:
Number of keys deleted
"""
try:
client = await get_redis_client()
if not client:
return 0
keys = []
async for key in client.scan_iter(match=pattern):
keys.append(key)
if keys:
deleted = await client.delete(*keys)
logger.info(f"Deleted {deleted} keys matching pattern: {pattern}")
return deleted
return 0
except Exception as e:
logger.warning(f"Cache delete pattern error for {pattern}: {e}")
return 0
def cache_response(key_prefix: str, ttl: int = 60):
"""
Decorator to cache endpoint responses
Args:
key_prefix: Prefix for cache key (will be combined with tenant_id)
ttl: Time to live in seconds
Usage:
@cache_response("dashboard:health", ttl=30)
async def get_health(tenant_id: str):
...
"""
def decorator(func: Callable):
@wraps(func)
async def wrapper(*args, **kwargs):
# Extract tenant_id from kwargs or args
tenant_id = kwargs.get('tenant_id')
if not tenant_id and args:
# Try to find tenant_id in args (assuming it's the first argument)
tenant_id = args[0] if len(args) > 0 else None
if not tenant_id:
# No tenant_id, skip caching
return await func(*args, **kwargs)
# Build cache key
cache_key = f"{key_prefix}:{tenant_id}"
# Try to get from cache
cached_value = await get_cached(cache_key)
if cached_value is not None:
return cached_value
# Execute function
result = await func(*args, **kwargs)
# Cache result
await set_cached(cache_key, result, ttl)
return result
return wrapper
return decorator
def make_cache_key(prefix: str, tenant_id: str, **params) -> str:
"""
Create a cache key with optional parameters
Args:
prefix: Key prefix
tenant_id: Tenant ID
**params: Additional parameters to include in key
Returns:
Cache key string
"""
key_parts = [prefix, tenant_id]
for k, v in sorted(params.items()):
if v is not None:
key_parts.append(f"{k}:{v}")
return ":".join(key_parts)