REFACTOR external service and improve websocket training
This commit is contained in:
238
services/training/app/services/training_events.py
Normal file
238
services/training/app/services/training_events.py
Normal file
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
Training Progress Events Publisher
|
||||
Simple, clean event publisher for the 4 main training steps
|
||||
"""
|
||||
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, Optional
|
||||
from shared.messaging.rabbitmq import RabbitMQClient
|
||||
from app.core.config import settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Single global publisher instance
|
||||
training_publisher = RabbitMQClient(settings.RABBITMQ_URL, "training-service")
|
||||
|
||||
|
||||
async def setup_messaging():
|
||||
"""Initialize messaging"""
|
||||
success = await training_publisher.connect()
|
||||
if success:
|
||||
logger.info("Training messaging initialized")
|
||||
else:
|
||||
logger.warning("Training messaging failed to initialize")
|
||||
return success
|
||||
|
||||
|
||||
async def cleanup_messaging():
|
||||
"""Cleanup messaging"""
|
||||
await training_publisher.disconnect()
|
||||
logger.info("Training messaging cleaned up")
|
||||
|
||||
|
||||
# ==========================================
|
||||
# 4 MAIN TRAINING PROGRESS EVENTS
|
||||
# ==========================================
|
||||
|
||||
async def publish_training_started(
|
||||
job_id: str,
|
||||
tenant_id: str,
|
||||
total_products: int
|
||||
) -> bool:
|
||||
"""
|
||||
Event 1: Training Started (0% progress)
|
||||
"""
|
||||
event_data = {
|
||||
"service_name": "training-service",
|
||||
"event_type": "training.started",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"progress": 0,
|
||||
"current_step": "Training Started",
|
||||
"step_details": f"Starting training for {total_products} products",
|
||||
"total_products": total_products
|
||||
}
|
||||
}
|
||||
|
||||
success = await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
routing_key="training.started",
|
||||
event_data=event_data
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Published training started event",
|
||||
job_id=job_id,
|
||||
tenant_id=tenant_id,
|
||||
total_products=total_products)
|
||||
else:
|
||||
logger.error("Failed to publish training started event", job_id=job_id)
|
||||
|
||||
return success
|
||||
|
||||
|
||||
async def publish_data_analysis(
|
||||
job_id: str,
|
||||
tenant_id: str,
|
||||
analysis_details: Optional[str] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Event 2: Data Analysis (20% progress)
|
||||
"""
|
||||
event_data = {
|
||||
"service_name": "training-service",
|
||||
"event_type": "training.progress",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"progress": 20,
|
||||
"current_step": "Data Analysis",
|
||||
"step_details": analysis_details or "Analyzing sales, weather, and traffic data"
|
||||
}
|
||||
}
|
||||
|
||||
success = await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
routing_key="training.progress",
|
||||
event_data=event_data
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Published data analysis event",
|
||||
job_id=job_id,
|
||||
progress=20)
|
||||
else:
|
||||
logger.error("Failed to publish data analysis event", job_id=job_id)
|
||||
|
||||
return success
|
||||
|
||||
|
||||
async def publish_product_training_completed(
|
||||
job_id: str,
|
||||
tenant_id: str,
|
||||
product_name: str,
|
||||
products_completed: int,
|
||||
total_products: int
|
||||
) -> bool:
|
||||
"""
|
||||
Event 3: Product Training Completed (contributes to 20-80% progress)
|
||||
|
||||
This event is published each time a product training completes.
|
||||
The frontend/consumer will calculate the progress as:
|
||||
progress = 20 + (products_completed / total_products) * 60
|
||||
"""
|
||||
event_data = {
|
||||
"service_name": "training-service",
|
||||
"event_type": "training.product.completed",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"product_name": product_name,
|
||||
"products_completed": products_completed,
|
||||
"total_products": total_products,
|
||||
"current_step": "Model Training",
|
||||
"step_details": f"Completed training for {product_name} ({products_completed}/{total_products})"
|
||||
}
|
||||
}
|
||||
|
||||
success = await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
routing_key="training.product.completed",
|
||||
event_data=event_data
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Published product training completed event",
|
||||
job_id=job_id,
|
||||
product_name=product_name,
|
||||
products_completed=products_completed,
|
||||
total_products=total_products)
|
||||
else:
|
||||
logger.error("Failed to publish product training completed event",
|
||||
job_id=job_id)
|
||||
|
||||
return success
|
||||
|
||||
|
||||
async def publish_training_completed(
|
||||
job_id: str,
|
||||
tenant_id: str,
|
||||
successful_trainings: int,
|
||||
failed_trainings: int,
|
||||
total_duration_seconds: float
|
||||
) -> bool:
|
||||
"""
|
||||
Event 4: Training Completed (100% progress)
|
||||
"""
|
||||
event_data = {
|
||||
"service_name": "training-service",
|
||||
"event_type": "training.completed",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"progress": 100,
|
||||
"current_step": "Training Completed",
|
||||
"step_details": f"Training completed: {successful_trainings} successful, {failed_trainings} failed",
|
||||
"successful_trainings": successful_trainings,
|
||||
"failed_trainings": failed_trainings,
|
||||
"total_duration_seconds": total_duration_seconds
|
||||
}
|
||||
}
|
||||
|
||||
success = await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
routing_key="training.completed",
|
||||
event_data=event_data
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Published training completed event",
|
||||
job_id=job_id,
|
||||
successful_trainings=successful_trainings,
|
||||
failed_trainings=failed_trainings)
|
||||
else:
|
||||
logger.error("Failed to publish training completed event", job_id=job_id)
|
||||
|
||||
return success
|
||||
|
||||
|
||||
async def publish_training_failed(
|
||||
job_id: str,
|
||||
tenant_id: str,
|
||||
error_message: str
|
||||
) -> bool:
|
||||
"""
|
||||
Event: Training Failed
|
||||
"""
|
||||
event_data = {
|
||||
"service_name": "training-service",
|
||||
"event_type": "training.failed",
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"data": {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"current_step": "Training Failed",
|
||||
"error_message": error_message
|
||||
}
|
||||
}
|
||||
|
||||
success = await training_publisher.publish_event(
|
||||
exchange_name="training.events",
|
||||
routing_key="training.failed",
|
||||
event_data=event_data
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Published training failed event",
|
||||
job_id=job_id,
|
||||
error=error_message)
|
||||
else:
|
||||
logger.error("Failed to publish training failed event", job_id=job_id)
|
||||
|
||||
return success
|
||||
Reference in New Issue
Block a user