633 lines
23 KiB
Python
633 lines
23 KiB
Python
"""
|
|
Enhanced Training API Endpoints with Repository Pattern
|
|
Updated to use repository pattern with dependency injection and improved error handling
|
|
"""
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, status, BackgroundTasks, Request
|
|
from fastapi import Query, Path
|
|
from typing import List, Optional, Dict, Any
|
|
import structlog
|
|
from datetime import datetime, timezone
|
|
import uuid
|
|
|
|
from app.services.training_service import EnhancedTrainingService
|
|
from app.schemas.training import (
|
|
TrainingJobRequest,
|
|
SingleProductTrainingRequest,
|
|
TrainingJobResponse
|
|
)
|
|
|
|
from app.services.messaging import (
|
|
publish_job_progress,
|
|
publish_data_validation_started,
|
|
publish_data_validation_completed,
|
|
publish_job_step_completed,
|
|
publish_job_completed,
|
|
publish_job_failed,
|
|
publish_job_started
|
|
)
|
|
|
|
from shared.auth.decorators import require_admin_role, get_current_user_dep, get_current_tenant_id_dep
|
|
from shared.database.base import create_database_manager
|
|
from shared.monitoring.decorators import track_execution_time
|
|
from shared.monitoring.metrics import get_metrics_collector
|
|
from app.core.config import settings
|
|
|
|
logger = structlog.get_logger()
|
|
router = APIRouter(tags=["enhanced-training"])
|
|
|
|
def get_enhanced_training_service():
|
|
"""Dependency injection for EnhancedTrainingService"""
|
|
database_manager = create_database_manager(settings.DATABASE_URL, "training-service")
|
|
return EnhancedTrainingService(database_manager)
|
|
|
|
@router.post("/tenants/{tenant_id}/training/jobs", response_model=TrainingJobResponse)
|
|
@track_execution_time("enhanced_training_job_duration_seconds", "training-service")
|
|
async def start_enhanced_training_job(
|
|
request: TrainingJobRequest,
|
|
tenant_id: str = Path(..., description="Tenant ID"),
|
|
background_tasks: BackgroundTasks = BackgroundTasks(),
|
|
request_obj: Request = None,
|
|
current_tenant: str = Depends(get_current_tenant_id_dep),
|
|
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
|
):
|
|
"""
|
|
Start a new enhanced training job for all tenant products using repository pattern.
|
|
|
|
🚀 ENHANCED IMMEDIATE RESPONSE PATTERN:
|
|
1. Validate request with enhanced validation
|
|
2. Create job record using repository pattern
|
|
3. Return 200 with enhanced job details
|
|
4. Execute enhanced training in background with repository tracking
|
|
|
|
Enhanced features:
|
|
- Repository pattern for data access
|
|
- Enhanced error handling and logging
|
|
- Metrics tracking and monitoring
|
|
- Transactional operations
|
|
"""
|
|
metrics = get_metrics_collector(request_obj)
|
|
|
|
try:
|
|
# Enhanced tenant validation
|
|
if tenant_id != current_tenant:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_training_access_denied_total")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Access denied to tenant resources"
|
|
)
|
|
|
|
# Generate enhanced job ID
|
|
job_id = f"enhanced_training_{tenant_id}_{uuid.uuid4().hex[:8]}"
|
|
|
|
logger.info("Creating enhanced training job using repository pattern",
|
|
job_id=job_id,
|
|
tenant_id=tenant_id)
|
|
|
|
# Record job creation metrics
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_training_jobs_created_total")
|
|
|
|
# Add enhanced background task
|
|
background_tasks.add_task(
|
|
execute_enhanced_training_job_background,
|
|
tenant_id=tenant_id,
|
|
job_id=job_id,
|
|
bakery_location=(40.4168, -3.7038),
|
|
requested_start=request.start_date,
|
|
requested_end=request.end_date
|
|
)
|
|
|
|
# Return enhanced immediate success response
|
|
response_data = {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"status": "pending",
|
|
"message": "Enhanced training job started successfully using repository pattern",
|
|
"created_at": datetime.now(timezone.utc),
|
|
"estimated_duration_minutes": 18,
|
|
"training_results": {
|
|
"total_products": 0, # Will be updated during processing
|
|
"successful_trainings": 0,
|
|
"failed_trainings": 0,
|
|
"products": [],
|
|
"overall_training_time_seconds": 0.0
|
|
},
|
|
"data_summary": None,
|
|
"completed_at": None,
|
|
"error_details": None,
|
|
"processing_metadata": {
|
|
"background_task": True,
|
|
"async_execution": True,
|
|
"enhanced_features": True,
|
|
"repository_pattern": True,
|
|
"dependency_injection": True
|
|
}
|
|
}
|
|
|
|
logger.info("Enhanced training job queued successfully",
|
|
job_id=job_id,
|
|
features=["repository-pattern", "dependency-injection", "enhanced-tracking"])
|
|
|
|
return TrainingJobResponse(**response_data)
|
|
|
|
except HTTPException:
|
|
# Re-raise HTTP exceptions as-is
|
|
raise
|
|
except ValueError as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_training_validation_errors_total")
|
|
logger.error("Enhanced training job validation error",
|
|
error=str(e),
|
|
tenant_id=tenant_id)
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=str(e)
|
|
)
|
|
except Exception as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_training_job_errors_total")
|
|
logger.error("Failed to queue enhanced training job",
|
|
error=str(e),
|
|
tenant_id=tenant_id)
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to start enhanced training job"
|
|
)
|
|
|
|
|
|
async def execute_enhanced_training_job_background(
|
|
tenant_id: str,
|
|
job_id: str,
|
|
bakery_location: tuple,
|
|
requested_start: Optional[datetime] = None,
|
|
requested_end: Optional[datetime] = None
|
|
):
|
|
"""
|
|
Enhanced background task that executes the training job using repository pattern.
|
|
|
|
🔧 ENHANCED FEATURES:
|
|
- Repository pattern for all data operations
|
|
- Enhanced error handling with structured logging
|
|
- Transactional operations for data consistency
|
|
- Comprehensive metrics tracking
|
|
- Database connection pooling
|
|
- Enhanced progress reporting
|
|
"""
|
|
|
|
logger.info("Enhanced background training job started",
|
|
job_id=job_id,
|
|
tenant_id=tenant_id,
|
|
features=["repository-pattern", "enhanced-tracking"])
|
|
|
|
# Get enhanced training service with dependency injection
|
|
database_manager = create_database_manager(settings.DATABASE_URL, "training-service")
|
|
enhanced_training_service = EnhancedTrainingService(database_manager)
|
|
|
|
try:
|
|
# Create initial training log entry first
|
|
await enhanced_training_service._update_job_status_repository(
|
|
job_id=job_id,
|
|
status="pending",
|
|
progress=0,
|
|
current_step="Starting enhanced training job",
|
|
tenant_id=tenant_id
|
|
)
|
|
|
|
# Publish job started event
|
|
await publish_job_started(job_id, tenant_id, {
|
|
"enhanced_features": True,
|
|
"repository_pattern": True,
|
|
"job_type": "enhanced_training"
|
|
})
|
|
|
|
training_config = {
|
|
"job_id": job_id,
|
|
"tenant_id": tenant_id,
|
|
"bakery_location": {
|
|
"latitude": bakery_location[0],
|
|
"longitude": bakery_location[1]
|
|
},
|
|
"requested_start": requested_start.isoformat() if requested_start else None,
|
|
"requested_end": requested_end.isoformat() if requested_end else None,
|
|
"estimated_duration_minutes": 18,
|
|
"background_execution": True,
|
|
"enhanced_features": True,
|
|
"repository_pattern": True,
|
|
"api_version": "enhanced_v1"
|
|
}
|
|
|
|
# Update job status using repository pattern
|
|
await enhanced_training_service._update_job_status_repository(
|
|
job_id=job_id,
|
|
status="running",
|
|
progress=0,
|
|
current_step="Initializing enhanced training pipeline",
|
|
tenant_id=tenant_id
|
|
)
|
|
|
|
# Execute the enhanced training pipeline with repository pattern
|
|
result = await enhanced_training_service.start_training_job(
|
|
tenant_id=tenant_id,
|
|
job_id=job_id,
|
|
bakery_location=bakery_location,
|
|
requested_start=requested_start,
|
|
requested_end=requested_end
|
|
)
|
|
|
|
# Update final status using repository pattern
|
|
await enhanced_training_service._update_job_status_repository(
|
|
job_id=job_id,
|
|
status="completed",
|
|
progress=100,
|
|
current_step="Enhanced training completed successfully",
|
|
results=result,
|
|
tenant_id=tenant_id
|
|
)
|
|
|
|
# Publish enhanced completion event
|
|
await publish_job_completed(
|
|
job_id=job_id,
|
|
tenant_id=tenant_id,
|
|
results={
|
|
**result,
|
|
"enhanced_features": True,
|
|
"repository_integration": True
|
|
}
|
|
)
|
|
|
|
logger.info("Enhanced background training job completed successfully",
|
|
job_id=job_id,
|
|
models_created=result.get('products_trained', 0),
|
|
features=["repository-pattern", "enhanced-tracking"])
|
|
|
|
except Exception as training_error:
|
|
logger.error("Enhanced training pipeline failed",
|
|
job_id=job_id,
|
|
error=str(training_error))
|
|
|
|
try:
|
|
await enhanced_training_service._update_job_status_repository(
|
|
job_id=job_id,
|
|
status="failed",
|
|
progress=0,
|
|
current_step="Enhanced training failed",
|
|
error_message=str(training_error),
|
|
tenant_id=tenant_id
|
|
)
|
|
except Exception as status_error:
|
|
logger.error("Failed to update job status after training error",
|
|
job_id=job_id,
|
|
status_error=str(status_error))
|
|
|
|
# Publish enhanced failure event
|
|
await publish_job_failed(
|
|
job_id=job_id,
|
|
tenant_id=tenant_id,
|
|
error=str(training_error),
|
|
metadata={
|
|
"enhanced_features": True,
|
|
"repository_pattern": True,
|
|
"error_type": type(training_error).__name__
|
|
}
|
|
)
|
|
|
|
except Exception as background_error:
|
|
logger.error("Critical error in enhanced background training job",
|
|
job_id=job_id,
|
|
error=str(background_error))
|
|
|
|
finally:
|
|
logger.info("Enhanced background training job cleanup completed",
|
|
job_id=job_id)
|
|
|
|
|
|
@router.post("/tenants/{tenant_id}/training/products/{inventory_product_id}", response_model=TrainingJobResponse)
|
|
@track_execution_time("enhanced_single_product_training_duration_seconds", "training-service")
|
|
async def start_enhanced_single_product_training(
|
|
request: SingleProductTrainingRequest,
|
|
tenant_id: str = Path(..., description="Tenant ID"),
|
|
inventory_product_id: str = Path(..., description="Inventory product UUID"),
|
|
request_obj: Request = None,
|
|
current_tenant: str = Depends(get_current_tenant_id_dep),
|
|
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
|
):
|
|
"""
|
|
Start enhanced training for a single product using repository pattern.
|
|
|
|
Enhanced features:
|
|
- Repository pattern for data access
|
|
- Enhanced error handling and validation
|
|
- Metrics tracking
|
|
- Transactional operations
|
|
"""
|
|
metrics = get_metrics_collector(request_obj)
|
|
|
|
try:
|
|
# Enhanced tenant validation
|
|
if tenant_id != current_tenant:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_single_product_access_denied_total")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Access denied to tenant resources"
|
|
)
|
|
|
|
logger.info("Starting enhanced single product training",
|
|
inventory_product_id=inventory_product_id,
|
|
tenant_id=tenant_id)
|
|
|
|
# Record metrics
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_single_product_training_total")
|
|
|
|
# Generate enhanced job ID
|
|
job_id = f"enhanced_single_{tenant_id}_{inventory_product_id}_{uuid.uuid4().hex[:8]}"
|
|
|
|
# Delegate to enhanced training service (single product method to be implemented)
|
|
result = await enhanced_training_service.start_single_product_training(
|
|
tenant_id=tenant_id,
|
|
inventory_product_id=inventory_product_id,
|
|
job_id=job_id,
|
|
bakery_location=request.bakery_location or (40.4168, -3.7038)
|
|
)
|
|
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_single_product_training_success_total")
|
|
|
|
logger.info("Enhanced single product training completed",
|
|
inventory_product_id=inventory_product_id,
|
|
job_id=job_id)
|
|
|
|
return TrainingJobResponse(**result)
|
|
|
|
except ValueError as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_single_product_validation_errors_total")
|
|
logger.error("Enhanced single product training validation error",
|
|
error=str(e),
|
|
inventory_product_id=inventory_product_id)
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail=str(e)
|
|
)
|
|
except Exception as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_single_product_training_errors_total")
|
|
logger.error("Enhanced single product training failed",
|
|
error=str(e),
|
|
inventory_product_id=inventory_product_id)
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Enhanced single product training failed"
|
|
)
|
|
|
|
|
|
@router.get("/tenants/{tenant_id}/training/jobs/{job_id}/status")
|
|
@track_execution_time("enhanced_job_status_duration_seconds", "training-service")
|
|
async def get_enhanced_training_job_status(
|
|
tenant_id: str = Path(..., description="Tenant ID"),
|
|
job_id: str = Path(..., description="Job ID"),
|
|
request_obj: Request = None,
|
|
current_tenant: str = Depends(get_current_tenant_id_dep),
|
|
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
|
):
|
|
"""
|
|
Get enhanced training job status using repository pattern.
|
|
"""
|
|
metrics = get_metrics_collector(request_obj)
|
|
|
|
try:
|
|
# Validate tenant access
|
|
if tenant_id != current_tenant:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_status_access_denied_total")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Access denied to tenant resources"
|
|
)
|
|
|
|
# Get status using enhanced service
|
|
status_info = await enhanced_training_service.get_training_status(job_id)
|
|
|
|
if not status_info or status_info.get("error"):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Training job not found"
|
|
)
|
|
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_status_requests_total")
|
|
|
|
return {
|
|
**status_info,
|
|
"enhanced_features": True,
|
|
"repository_integration": True
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_status_errors_total")
|
|
logger.error("Failed to get enhanced training status",
|
|
job_id=job_id,
|
|
error=str(e))
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to get training status"
|
|
)
|
|
|
|
|
|
@router.get("/tenants/{tenant_id}/models")
|
|
@track_execution_time("enhanced_models_list_duration_seconds", "training-service")
|
|
async def get_enhanced_tenant_models(
|
|
tenant_id: str = Path(..., description="Tenant ID"),
|
|
active_only: bool = Query(True, description="Return only active models"),
|
|
skip: int = Query(0, description="Number of models to skip"),
|
|
limit: int = Query(100, description="Number of models to return"),
|
|
request_obj: Request = None,
|
|
current_tenant: str = Depends(get_current_tenant_id_dep),
|
|
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
|
):
|
|
"""
|
|
Get tenant models using enhanced repository pattern.
|
|
"""
|
|
metrics = get_metrics_collector(request_obj)
|
|
|
|
try:
|
|
# Validate tenant access
|
|
if tenant_id != current_tenant:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_models_access_denied_total")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Access denied to tenant resources"
|
|
)
|
|
|
|
# Get models using enhanced service
|
|
models = await enhanced_training_service.get_tenant_models(
|
|
tenant_id=tenant_id,
|
|
active_only=active_only,
|
|
skip=skip,
|
|
limit=limit
|
|
)
|
|
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_models_requests_total")
|
|
|
|
return {
|
|
"tenant_id": tenant_id,
|
|
"models": models,
|
|
"total_returned": len(models),
|
|
"active_only": active_only,
|
|
"pagination": {
|
|
"skip": skip,
|
|
"limit": limit
|
|
},
|
|
"enhanced_features": True,
|
|
"repository_integration": True
|
|
}
|
|
|
|
except Exception as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_models_errors_total")
|
|
logger.error("Failed to get enhanced tenant models",
|
|
tenant_id=tenant_id,
|
|
error=str(e))
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to get tenant models"
|
|
)
|
|
|
|
|
|
@router.get("/tenants/{tenant_id}/models/{model_id}/performance")
|
|
@track_execution_time("enhanced_model_performance_duration_seconds", "training-service")
|
|
async def get_enhanced_model_performance(
|
|
tenant_id: str = Path(..., description="Tenant ID"),
|
|
model_id: str = Path(..., description="Model ID"),
|
|
request_obj: Request = None,
|
|
current_tenant: str = Depends(get_current_tenant_id_dep),
|
|
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
|
):
|
|
"""
|
|
Get enhanced model performance metrics using repository pattern.
|
|
"""
|
|
metrics = get_metrics_collector(request_obj)
|
|
|
|
try:
|
|
# Validate tenant access
|
|
if tenant_id != current_tenant:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_performance_access_denied_total")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Access denied to tenant resources"
|
|
)
|
|
|
|
# Get performance using enhanced service
|
|
performance = await enhanced_training_service.get_model_performance(model_id)
|
|
|
|
if not performance:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Model performance not found"
|
|
)
|
|
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_performance_requests_total")
|
|
|
|
return {
|
|
**performance,
|
|
"enhanced_features": True,
|
|
"repository_integration": True
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_performance_errors_total")
|
|
logger.error("Failed to get enhanced model performance",
|
|
model_id=model_id,
|
|
error=str(e))
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to get model performance"
|
|
)
|
|
|
|
|
|
@router.get("/tenants/{tenant_id}/statistics")
|
|
@track_execution_time("enhanced_tenant_statistics_duration_seconds", "training-service")
|
|
async def get_enhanced_tenant_statistics(
|
|
tenant_id: str = Path(..., description="Tenant ID"),
|
|
request_obj: Request = None,
|
|
current_tenant: str = Depends(get_current_tenant_id_dep),
|
|
enhanced_training_service: EnhancedTrainingService = Depends(get_enhanced_training_service)
|
|
):
|
|
"""
|
|
Get comprehensive enhanced tenant statistics using repository pattern.
|
|
"""
|
|
metrics = get_metrics_collector(request_obj)
|
|
|
|
try:
|
|
# Validate tenant access
|
|
if tenant_id != current_tenant:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_statistics_access_denied_total")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_403_FORBIDDEN,
|
|
detail="Access denied to tenant resources"
|
|
)
|
|
|
|
# Get statistics using enhanced service
|
|
statistics = await enhanced_training_service.get_tenant_statistics(tenant_id)
|
|
|
|
if statistics.get("error"):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail=statistics["error"]
|
|
)
|
|
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_statistics_requests_total")
|
|
|
|
return {
|
|
**statistics,
|
|
"enhanced_features": True,
|
|
"repository_integration": True
|
|
}
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
if metrics:
|
|
metrics.increment_counter("enhanced_statistics_errors_total")
|
|
logger.error("Failed to get enhanced tenant statistics",
|
|
tenant_id=tenant_id,
|
|
error=str(e))
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to get tenant statistics"
|
|
)
|
|
|
|
|
|
@router.get("/health")
|
|
async def enhanced_health_check():
|
|
"""
|
|
Enhanced health check endpoint for the training service.
|
|
"""
|
|
return {
|
|
"status": "healthy",
|
|
"service": "enhanced-training-service",
|
|
"version": "2.0.0",
|
|
"features": [
|
|
"repository-pattern",
|
|
"dependency-injection",
|
|
"enhanced-error-handling",
|
|
"metrics-tracking",
|
|
"transactional-operations"
|
|
],
|
|
"timestamp": datetime.now().isoformat()
|
|
} |