New alert service

This commit is contained in:
Urtzi Alfaro
2025-12-05 20:07:01 +01:00
parent 1fe3a73549
commit 667e6e0404
393 changed files with 26002 additions and 61033 deletions

View File

@@ -324,15 +324,117 @@ class RetrainingTriggerService:
"outdated_models": 0
}
# TODO: Trigger retraining for outdated models
# Would need to get list of outdated products from training service
# Trigger retraining for outdated models
try:
from shared.clients.training_client import TrainingServiceClient
from shared.config.base import get_settings
from shared.messaging import get_rabbitmq_client
return {
"status": "analyzed",
"tenant_id": str(tenant_id),
"outdated_models": outdated_count,
"message": "Scheduled retraining analysis complete"
}
config = get_settings()
training_client = TrainingServiceClient(config, "forecasting")
# Get list of models that need retraining
outdated_models = await training_client.get_outdated_models(
tenant_id=str(tenant_id),
max_age_days=max_model_age_days,
min_accuracy=0.85, # Configurable threshold
min_new_data_points=1000 # Configurable threshold
)
if not outdated_models:
logger.info("No specific models returned for retraining", tenant_id=tenant_id)
return {
"status": "no_models_found",
"tenant_id": str(tenant_id),
"outdated_models": outdated_count
}
# Publish retraining events to RabbitMQ for each model
rabbitmq_client = get_rabbitmq_client()
triggered_models = []
if rabbitmq_client:
for model in outdated_models:
try:
import uuid as uuid_module
from datetime import datetime
retraining_event = {
"event_id": str(uuid_module.uuid4()),
"event_type": "training.retrain.requested",
"timestamp": datetime.utcnow().isoformat(),
"tenant_id": str(tenant_id),
"data": {
"model_id": model.get('id'),
"product_id": model.get('product_id'),
"model_type": model.get('model_type'),
"current_accuracy": model.get('accuracy'),
"model_age_days": model.get('age_days'),
"new_data_points": model.get('new_data_points', 0),
"trigger_reason": model.get('trigger_reason', 'scheduled_check'),
"priority": model.get('priority', 'normal'),
"requested_by": "system_scheduled_check"
}
}
await rabbitmq_client.publish_event(
exchange_name="training.events",
routing_key="training.retrain.requested",
event_data=retraining_event
)
triggered_models.append({
'model_id': model.get('id'),
'product_id': model.get('product_id'),
'event_id': retraining_event['event_id']
})
logger.info(
"Published retraining request",
model_id=model.get('id'),
product_id=model.get('product_id'),
event_id=retraining_event['event_id'],
trigger_reason=model.get('trigger_reason')
)
except Exception as publish_error:
logger.error(
"Failed to publish retraining event",
model_id=model.get('id'),
error=str(publish_error)
)
# Continue with other models even if one fails
else:
logger.warning(
"RabbitMQ client not available, cannot trigger retraining",
tenant_id=tenant_id
)
return {
"status": "retraining_triggered",
"tenant_id": str(tenant_id),
"outdated_models": outdated_count,
"triggered_count": len(triggered_models),
"triggered_models": triggered_models,
"message": f"Triggered retraining for {len(triggered_models)} models"
}
except Exception as trigger_error:
logger.error(
"Failed to trigger retraining",
tenant_id=tenant_id,
error=str(trigger_error),
exc_info=True
)
# Return analysis result even if triggering failed
return {
"status": "trigger_failed",
"tenant_id": str(tenant_id),
"outdated_models": outdated_count,
"error": str(trigger_error),
"message": "Analysis complete but failed to trigger retraining"
}
except Exception as e:
logger.error(