331 lines
10 KiB
Python
331 lines
10 KiB
Python
"""
|
|
Training Progress Events Publisher
|
|
Simple, clean event publisher for the 4 main training steps
|
|
"""
|
|
|
|
import structlog
|
|
from datetime import datetime
|
|
from typing import Dict, Any, Optional
|
|
from shared.messaging import RabbitMQClient
|
|
from app.core.config import settings
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
# Single global publisher instance
|
|
training_publisher = RabbitMQClient(settings.RABBITMQ_URL, "training-service")
|
|
|
|
|
|
async def setup_messaging():
|
|
"""Initialize messaging"""
|
|
success = await training_publisher.connect()
|
|
if success:
|
|
logger.info("Training messaging initialized")
|
|
else:
|
|
logger.warning("Training messaging failed to initialize")
|
|
return success
|
|
|
|
|
|
async def cleanup_messaging():
|
|
"""Cleanup messaging"""
|
|
await training_publisher.disconnect()
|
|
logger.info("Training messaging cleaned up")
|
|
|
|
|
|
# ==========================================
|
|
# 4 MAIN TRAINING PROGRESS EVENTS
|
|
# ==========================================
|
|
|
|
async def publish_training_started(
|
|
job_id: str,
|
|
tenant_id: str,
|
|
total_products: int,
|
|
estimated_duration_minutes: Optional[int] = None,
|
|
estimated_completion_time: Optional[str] = None
|
|
) -> bool:
|
|
"""
|
|
Event 1: Training Started (0% progress)
|
|
|
|
Args:
|
|
job_id: Training job identifier
|
|
tenant_id: Tenant identifier
|
|
total_products: Number of products to train
|
|
estimated_duration_minutes: Estimated time to completion in minutes
|
|
estimated_completion_time: ISO timestamp of estimated completion
|
|
"""
|
|
event_data = {
|
|
"service_name": "training-service",
|
|
"event_type": "training.started",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"data": {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"progress": 0,
|
|
"current_step": "Training Started",
|
|
"step_details": f"Starting training for {total_products} products",
|
|
"total_products": total_products,
|
|
"estimated_duration_minutes": estimated_duration_minutes,
|
|
"estimated_completion_time": estimated_completion_time,
|
|
"estimated_time_remaining_seconds": estimated_duration_minutes * 60 if estimated_duration_minutes else None
|
|
}
|
|
}
|
|
|
|
success = await training_publisher.publish_event(
|
|
exchange_name="training.events",
|
|
routing_key="training.started",
|
|
event_data=event_data
|
|
)
|
|
|
|
if success:
|
|
logger.info("Published training started event",
|
|
job_id=job_id,
|
|
tenant_id=tenant_id,
|
|
total_products=total_products,
|
|
estimated_duration_minutes=estimated_duration_minutes)
|
|
else:
|
|
logger.error("Failed to publish training started event", job_id=job_id)
|
|
|
|
return success
|
|
|
|
|
|
async def publish_data_analysis(
|
|
job_id: str,
|
|
tenant_id: str,
|
|
analysis_details: Optional[str] = None,
|
|
estimated_time_remaining_seconds: Optional[int] = None,
|
|
estimated_completion_time: Optional[str] = None
|
|
) -> bool:
|
|
"""
|
|
Event 2: Data Analysis (20% progress)
|
|
|
|
Args:
|
|
job_id: Training job identifier
|
|
tenant_id: Tenant identifier
|
|
analysis_details: Details about the analysis
|
|
estimated_time_remaining_seconds: Estimated time remaining in seconds
|
|
estimated_completion_time: ISO timestamp of estimated completion
|
|
"""
|
|
event_data = {
|
|
"service_name": "training-service",
|
|
"event_type": "training.progress",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"data": {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"progress": 20,
|
|
"current_step": "Data Analysis",
|
|
"step_details": analysis_details or "Analyzing sales, weather, and traffic data",
|
|
"estimated_time_remaining_seconds": estimated_time_remaining_seconds,
|
|
"estimated_completion_time": estimated_completion_time
|
|
}
|
|
}
|
|
|
|
success = await training_publisher.publish_event(
|
|
exchange_name="training.events",
|
|
routing_key="training.progress",
|
|
event_data=event_data
|
|
)
|
|
|
|
if success:
|
|
logger.info("Published data analysis event",
|
|
job_id=job_id,
|
|
progress=20)
|
|
else:
|
|
logger.error("Failed to publish data analysis event", job_id=job_id)
|
|
|
|
return success
|
|
|
|
|
|
async def publish_training_progress(
|
|
job_id: str,
|
|
tenant_id: str,
|
|
progress: int,
|
|
current_step: str,
|
|
step_details: Optional[str] = None,
|
|
estimated_time_remaining_seconds: Optional[int] = None,
|
|
estimated_completion_time: Optional[str] = None
|
|
) -> bool:
|
|
"""
|
|
Generic Training Progress Event (for any progress percentage)
|
|
|
|
Args:
|
|
job_id: Training job identifier
|
|
tenant_id: Tenant identifier
|
|
progress: Progress percentage (0-100)
|
|
current_step: Current step name
|
|
step_details: Details about the current step
|
|
estimated_time_remaining_seconds: Estimated time remaining in seconds
|
|
estimated_completion_time: ISO timestamp of estimated completion
|
|
"""
|
|
event_data = {
|
|
"service_name": "training-service",
|
|
"event_type": "training.progress",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"data": {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"progress": progress,
|
|
"current_step": current_step,
|
|
"step_details": step_details or current_step,
|
|
"estimated_time_remaining_seconds": estimated_time_remaining_seconds,
|
|
"estimated_completion_time": estimated_completion_time
|
|
}
|
|
}
|
|
|
|
success = await training_publisher.publish_event(
|
|
exchange_name="training.events",
|
|
routing_key="training.progress",
|
|
event_data=event_data
|
|
)
|
|
|
|
if success:
|
|
logger.info("Published training progress event",
|
|
job_id=job_id,
|
|
progress=progress,
|
|
current_step=current_step)
|
|
else:
|
|
logger.error("Failed to publish training progress event",
|
|
job_id=job_id,
|
|
progress=progress)
|
|
|
|
return success
|
|
|
|
|
|
async def publish_product_training_completed(
|
|
job_id: str,
|
|
tenant_id: str,
|
|
product_name: str,
|
|
products_completed: int,
|
|
total_products: int,
|
|
estimated_time_remaining_seconds: Optional[int] = None,
|
|
estimated_completion_time: Optional[str] = None
|
|
) -> bool:
|
|
"""
|
|
Event 3: Product Training Completed (contributes to 20-80% progress)
|
|
|
|
This event is published each time a product training completes.
|
|
The frontend/consumer will calculate the progress as:
|
|
progress = 20 + (products_completed / total_products) * 60
|
|
|
|
Args:
|
|
job_id: Training job identifier
|
|
tenant_id: Tenant identifier
|
|
product_name: Name of the product that was trained
|
|
products_completed: Number of products completed so far
|
|
total_products: Total number of products
|
|
estimated_time_remaining_seconds: Estimated time remaining in seconds
|
|
estimated_completion_time: ISO timestamp of estimated completion
|
|
"""
|
|
event_data = {
|
|
"service_name": "training-service",
|
|
"event_type": "training.product.completed",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"data": {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"product_name": product_name,
|
|
"products_completed": products_completed,
|
|
"total_products": total_products,
|
|
"current_step": "Model Training",
|
|
"step_details": f"Completed training for {product_name} ({products_completed}/{total_products})",
|
|
"estimated_time_remaining_seconds": estimated_time_remaining_seconds,
|
|
"estimated_completion_time": estimated_completion_time
|
|
}
|
|
}
|
|
|
|
success = await training_publisher.publish_event(
|
|
exchange_name="training.events",
|
|
routing_key="training.product.completed",
|
|
event_data=event_data
|
|
)
|
|
|
|
if success:
|
|
logger.info("Published product training completed event",
|
|
job_id=job_id,
|
|
product_name=product_name,
|
|
products_completed=products_completed,
|
|
total_products=total_products)
|
|
else:
|
|
logger.error("Failed to publish product training completed event",
|
|
job_id=job_id)
|
|
|
|
return success
|
|
|
|
|
|
async def publish_training_completed(
|
|
job_id: str,
|
|
tenant_id: str,
|
|
successful_trainings: int,
|
|
failed_trainings: int,
|
|
total_duration_seconds: float
|
|
) -> bool:
|
|
"""
|
|
Event 4: Training Completed (100% progress)
|
|
"""
|
|
event_data = {
|
|
"service_name": "training-service",
|
|
"event_type": "training.completed",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"data": {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"progress": 100,
|
|
"current_step": "Training Completed",
|
|
"step_details": f"Training completed: {successful_trainings} successful, {failed_trainings} failed",
|
|
"successful_trainings": successful_trainings,
|
|
"failed_trainings": failed_trainings,
|
|
"total_duration_seconds": total_duration_seconds
|
|
}
|
|
}
|
|
|
|
success = await training_publisher.publish_event(
|
|
exchange_name="training.events",
|
|
routing_key="training.completed",
|
|
event_data=event_data
|
|
)
|
|
|
|
if success:
|
|
logger.info("Published training completed event",
|
|
job_id=job_id,
|
|
successful_trainings=successful_trainings,
|
|
failed_trainings=failed_trainings)
|
|
else:
|
|
logger.error("Failed to publish training completed event", job_id=job_id)
|
|
|
|
return success
|
|
|
|
|
|
async def publish_training_failed(
|
|
job_id: str,
|
|
tenant_id: str,
|
|
error_message: str
|
|
) -> bool:
|
|
"""
|
|
Event: Training Failed
|
|
"""
|
|
event_data = {
|
|
"service_name": "training-service",
|
|
"event_type": "training.failed",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"data": {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"current_step": "Training Failed",
|
|
"error_message": error_message
|
|
}
|
|
}
|
|
|
|
success = await training_publisher.publish_event(
|
|
exchange_name="training.events",
|
|
routing_key="training.failed",
|
|
event_data=event_data
|
|
)
|
|
|
|
if success:
|
|
logger.info("Published training failed event",
|
|
job_id=job_id,
|
|
error=error_message)
|
|
else:
|
|
logger.error("Failed to publish training failed event", job_id=job_id)
|
|
|
|
return success
|