Fix new services implementation 3

This commit is contained in:
Urtzi Alfaro
2025-08-14 16:47:34 +02:00
parent 0951547e92
commit 03737430ee
51 changed files with 657 additions and 982 deletions

View File

@@ -262,7 +262,7 @@ async def publish_job_cancelled(job_id: str, tenant_id: str, reason: str = "User
# PRODUCT-LEVEL TRAINING EVENTS
# =========================================
async def publish_product_training_started(job_id: str, tenant_id: str, product_name: str) -> bool:
async def publish_product_training_started(job_id: str, tenant_id: str, inventory_product_id: str) -> bool:
"""Publish single product training started event"""
return await training_publisher.publish_event(
exchange_name="training.events",
@@ -274,7 +274,7 @@ async def publish_product_training_started(job_id: str, tenant_id: str, product_
"data": {
"job_id": job_id,
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"started_at": datetime.now().isoformat()
}
}
@@ -283,7 +283,7 @@ async def publish_product_training_started(job_id: str, tenant_id: str, product_
async def publish_product_training_completed(
job_id: str,
tenant_id: str,
product_name: str,
inventory_product_id: str,
model_id: str,
metrics: Optional[Dict[str, float]] = None
) -> bool:
@@ -298,7 +298,7 @@ async def publish_product_training_completed(
"data": {
"job_id": job_id,
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"model_id": model_id,
"metrics": metrics or {},
"completed_at": datetime.now().isoformat()
@@ -309,7 +309,7 @@ async def publish_product_training_completed(
async def publish_product_training_failed(
job_id: str,
tenant_id: str,
product_name: str,
inventory_product_id: str,
error: str
) -> bool:
"""Publish single product training failed event"""
@@ -323,7 +323,7 @@ async def publish_product_training_failed(
"data": {
"job_id": job_id,
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"error": error,
"failed_at": datetime.now().isoformat()
}
@@ -334,7 +334,7 @@ async def publish_product_training_failed(
# MODEL LIFECYCLE EVENTS
# =========================================
async def publish_model_trained(model_id: str, tenant_id: str, product_name: str, metrics: Dict[str, float]) -> bool:
async def publish_model_trained(model_id: str, tenant_id: str, inventory_product_id: str, metrics: Dict[str, float]) -> bool:
"""Publish model trained event with safe metric serialization"""
# Clean metrics to ensure JSON serialization
@@ -347,7 +347,7 @@ async def publish_model_trained(model_id: str, tenant_id: str, product_name: str
"data": {
"model_id": model_id,
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"training_metrics": clean_metrics, # Now safe for JSON
"trained_at": datetime.now().isoformat()
}
@@ -360,7 +360,7 @@ async def publish_model_trained(model_id: str, tenant_id: str, product_name: str
)
async def publish_model_validated(model_id: str, tenant_id: str, product_name: str, validation_results: Dict[str, Any]) -> bool:
async def publish_model_validated(model_id: str, tenant_id: str, inventory_product_id: str, validation_results: Dict[str, Any]) -> bool:
"""Publish model validation event"""
return await training_publisher.publish_event(
exchange_name="training.events",
@@ -372,14 +372,14 @@ async def publish_model_validated(model_id: str, tenant_id: str, product_name: s
"data": {
"model_id": model_id,
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"validation_results": validation_results,
"validated_at": datetime.now().isoformat()
}
}
)
async def publish_model_saved(model_id: str, tenant_id: str, product_name: str, model_path: str) -> bool:
async def publish_model_saved(model_id: str, tenant_id: str, inventory_product_id: str, model_path: str) -> bool:
"""Publish model saved event"""
return await training_publisher.publish_event(
exchange_name="training.events",
@@ -391,7 +391,7 @@ async def publish_model_saved(model_id: str, tenant_id: str, product_name: str,
"data": {
"model_id": model_id,
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"model_path": model_path,
"saved_at": datetime.now().isoformat()
}
@@ -571,7 +571,7 @@ class TrainingStatusPublisher:
return 0
async def product_completed(self, product_name: str, model_id: str, metrics: Optional[Dict] = None):
async def product_completed(self, inventory_product_id: str, model_id: str, metrics: Optional[Dict] = None):
"""Mark a product as completed and update progress"""
self.products_completed += 1
@@ -579,7 +579,7 @@ class TrainingStatusPublisher:
clean_metrics = safe_json_serialize(metrics) if metrics else None
await publish_product_training_completed(
self.job_id, self.tenant_id, product_name, model_id, clean_metrics
self.job_id, self.tenant_id, inventory_product_id, model_id, clean_metrics
)
# Update overall progress
@@ -587,7 +587,7 @@ class TrainingStatusPublisher:
progress = int((self.products_completed / self.products_total) * 90) # Save 10% for final steps
await self.progress_update(
progress=progress,
step=f"Completed training for {product_name}",
step=f"Completed training for {inventory_product_id}",
current_product=None
)

View File

@@ -234,7 +234,7 @@ class TrainingDataOrchestrator:
def _validate_sales_record(self, record: Dict[str, Any]) -> bool:
"""Validate individual sales record"""
required_fields = ['date', 'product_name']
required_fields = ['date', 'inventory_product_id']
quantity_fields = ['quantity', 'quantity_sold', 'sales', 'units_sold']
# Check required fields
@@ -755,8 +755,8 @@ class TrainingDataOrchestrator:
# Check data consistency
unique_products = set()
for record in dataset.sales_data:
if 'product_name' in record:
unique_products.add(record['product_name'])
if 'inventory_product_id' in record:
unique_products.add(record['inventory_product_id'])
if len(unique_products) == 0:
validation_results["errors"].append("No product names found in sales data")
@@ -822,7 +822,7 @@ class TrainingDataOrchestrator:
"required": True,
"priority": "high",
"expected_records": "variable",
"data_points": ["date", "product_name", "quantity"],
"data_points": ["date", "inventory_product_id", "quantity"],
"validation": "required_fields_check"
}

View File

@@ -223,7 +223,7 @@ class EnhancedTrainingService:
"training_results": training_results,
"stored_models": [{
"id": str(model.id),
"product_name": model.product_name,
"inventory_product_id": str(model.inventory_product_id),
"model_type": model.model_type,
"model_path": model.model_path,
"is_active": model.is_active,
@@ -292,11 +292,11 @@ class EnhancedTrainingService:
models_trained_type=type(models_trained).__name__,
models_trained_keys=list(models_trained.keys()) if isinstance(models_trained, dict) else "not_dict")
for product_name, model_result in models_trained.items():
for inventory_product_id, model_result in models_trained.items():
# Defensive check: ensure model_result is a dictionary
if not isinstance(model_result, dict):
logger.warning("Skipping invalid model_result for product",
product_name=product_name,
inventory_product_id=inventory_product_id,
model_result_type=type(model_result).__name__,
model_result_value=str(model_result)[:100])
continue
@@ -306,12 +306,12 @@ class EnhancedTrainingService:
metrics = model_result.get("metrics", {})
if not isinstance(metrics, dict):
logger.warning("Invalid metrics object, using empty dict",
product_name=product_name,
inventory_product_id=inventory_product_id,
metrics_type=type(metrics).__name__)
metrics = {}
model_data = {
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"job_id": job_id,
"model_type": "prophet_optimized",
"model_path": model_result.get("model_path"),
@@ -371,14 +371,14 @@ class EnhancedTrainingService:
"""Create performance metrics for stored models"""
try:
for model in stored_models:
model_result = training_results.get("models_trained", {}).get(model.product_name)
model_result = training_results.get("models_trained", {}).get(str(model.inventory_product_id))
if model_result and model_result.get("metrics"):
metrics = model_result["metrics"]
metric_data = {
"model_id": str(model.id),
"tenant_id": tenant_id,
"product_name": model.product_name,
"inventory_product_id": str(model.inventory_product_id),
"mae": metrics.get("mae"),
"mse": metrics.get("mse"),
"rmse": metrics.get("rmse"),
@@ -556,14 +556,14 @@ class EnhancedTrainingService:
async def start_single_product_training(self,
tenant_id: str,
product_name: str,
inventory_product_id: str,
job_id: str,
bakery_location: tuple = (40.4168, -3.7038)) -> Dict[str, Any]:
"""Start enhanced single product training using repository pattern"""
try:
logger.info("Starting enhanced single product training",
tenant_id=tenant_id,
product_name=product_name,
inventory_product_id=inventory_product_id,
job_id=job_id)
# This would use the data client to fetch data for the specific product
@@ -573,7 +573,7 @@ class EnhancedTrainingService:
return {
"job_id": job_id,
"tenant_id": tenant_id,
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"status": "completed",
"message": "Enhanced single product training completed successfully",
"created_at": datetime.now(),
@@ -582,9 +582,9 @@ class EnhancedTrainingService:
"successful_trainings": 1,
"failed_trainings": 0,
"products": [{
"product_name": product_name,
"inventory_product_id": inventory_product_id,
"status": "completed",
"model_id": f"model_{product_name}_{job_id[:8]}",
"model_id": f"model_{inventory_product_id}_{job_id[:8]}",
"data_points": 100,
"metrics": {"mape": 15.5, "mae": 2.3, "rmse": 3.1, "r2_score": 0.85}
}],
@@ -597,7 +597,7 @@ class EnhancedTrainingService:
except Exception as e:
logger.error("Enhanced single product training failed",
product_name=product_name,
inventory_product_id=inventory_product_id,
error=str(e))
raise
@@ -611,7 +611,7 @@ class EnhancedTrainingService:
products = []
for model in stored_models:
products.append({
"product_name": model.get("product_name"),
"inventory_product_id": model.get("inventory_product_id"),
"status": "completed",
"model_id": model.get("id"),
"data_points": model.get("training_samples", 0),