New alert service
This commit is contained in:
@@ -324,15 +324,117 @@ class RetrainingTriggerService:
|
||||
"outdated_models": 0
|
||||
}
|
||||
|
||||
# TODO: Trigger retraining for outdated models
|
||||
# Would need to get list of outdated products from training service
|
||||
# Trigger retraining for outdated models
|
||||
try:
|
||||
from shared.clients.training_client import TrainingServiceClient
|
||||
from shared.config.base import get_settings
|
||||
from shared.messaging import get_rabbitmq_client
|
||||
|
||||
return {
|
||||
"status": "analyzed",
|
||||
"tenant_id": str(tenant_id),
|
||||
"outdated_models": outdated_count,
|
||||
"message": "Scheduled retraining analysis complete"
|
||||
}
|
||||
config = get_settings()
|
||||
training_client = TrainingServiceClient(config, "forecasting")
|
||||
|
||||
# Get list of models that need retraining
|
||||
outdated_models = await training_client.get_outdated_models(
|
||||
tenant_id=str(tenant_id),
|
||||
max_age_days=max_model_age_days,
|
||||
min_accuracy=0.85, # Configurable threshold
|
||||
min_new_data_points=1000 # Configurable threshold
|
||||
)
|
||||
|
||||
if not outdated_models:
|
||||
logger.info("No specific models returned for retraining", tenant_id=tenant_id)
|
||||
return {
|
||||
"status": "no_models_found",
|
||||
"tenant_id": str(tenant_id),
|
||||
"outdated_models": outdated_count
|
||||
}
|
||||
|
||||
# Publish retraining events to RabbitMQ for each model
|
||||
rabbitmq_client = get_rabbitmq_client()
|
||||
triggered_models = []
|
||||
|
||||
if rabbitmq_client:
|
||||
for model in outdated_models:
|
||||
try:
|
||||
import uuid as uuid_module
|
||||
from datetime import datetime
|
||||
|
||||
retraining_event = {
|
||||
"event_id": str(uuid_module.uuid4()),
|
||||
"event_type": "training.retrain.requested",
|
||||
"timestamp": datetime.utcnow().isoformat(),
|
||||
"tenant_id": str(tenant_id),
|
||||
"data": {
|
||||
"model_id": model.get('id'),
|
||||
"product_id": model.get('product_id'),
|
||||
"model_type": model.get('model_type'),
|
||||
"current_accuracy": model.get('accuracy'),
|
||||
"model_age_days": model.get('age_days'),
|
||||
"new_data_points": model.get('new_data_points', 0),
|
||||
"trigger_reason": model.get('trigger_reason', 'scheduled_check'),
|
||||
"priority": model.get('priority', 'normal'),
|
||||
"requested_by": "system_scheduled_check"
|
||||
}
|
||||
}
|
||||
|
||||
await rabbitmq_client.publish_event(
|
||||
exchange_name="training.events",
|
||||
routing_key="training.retrain.requested",
|
||||
event_data=retraining_event
|
||||
)
|
||||
|
||||
triggered_models.append({
|
||||
'model_id': model.get('id'),
|
||||
'product_id': model.get('product_id'),
|
||||
'event_id': retraining_event['event_id']
|
||||
})
|
||||
|
||||
logger.info(
|
||||
"Published retraining request",
|
||||
model_id=model.get('id'),
|
||||
product_id=model.get('product_id'),
|
||||
event_id=retraining_event['event_id'],
|
||||
trigger_reason=model.get('trigger_reason')
|
||||
)
|
||||
|
||||
except Exception as publish_error:
|
||||
logger.error(
|
||||
"Failed to publish retraining event",
|
||||
model_id=model.get('id'),
|
||||
error=str(publish_error)
|
||||
)
|
||||
# Continue with other models even if one fails
|
||||
|
||||
else:
|
||||
logger.warning(
|
||||
"RabbitMQ client not available, cannot trigger retraining",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "retraining_triggered",
|
||||
"tenant_id": str(tenant_id),
|
||||
"outdated_models": outdated_count,
|
||||
"triggered_count": len(triggered_models),
|
||||
"triggered_models": triggered_models,
|
||||
"message": f"Triggered retraining for {len(triggered_models)} models"
|
||||
}
|
||||
|
||||
except Exception as trigger_error:
|
||||
logger.error(
|
||||
"Failed to trigger retraining",
|
||||
tenant_id=tenant_id,
|
||||
error=str(trigger_error),
|
||||
exc_info=True
|
||||
)
|
||||
# Return analysis result even if triggering failed
|
||||
return {
|
||||
"status": "trigger_failed",
|
||||
"tenant_id": str(tenant_id),
|
||||
"outdated_models": outdated_count,
|
||||
"error": str(trigger_error),
|
||||
"message": "Analysis complete but failed to trigger retraining"
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
|
||||
Reference in New Issue
Block a user