# services/training/app/api/training.py """ Training API Endpoints - Entry point for training requests Handles HTTP requests and delegates to Training Service """ from fastapi import APIRouter, Depends, HTTPException, status, BackgroundTasks from fastapi import Query, Path from sqlalchemy.ext.asyncio import AsyncSession from typing import List, Optional, Dict, Any import structlog from datetime import datetime from app.core.database import get_db from app.services.training_service import TrainingService from app.schemas.training import ( TrainingJobRequest, SingleProductTrainingRequest ) from app.schemas.training import ( TrainingJobResponse ) # Import shared auth decorators (assuming they exist in your microservices) from shared.auth.decorators import get_current_tenant_id_dep logger = structlog.get_logger() router = APIRouter() # Initialize training service training_service = TrainingService() @router.post("/tenants/{tenant_id}/training/jobs", response_model=TrainingJobResponse) async def start_training_job( request: TrainingJobRequest, tenant_id: str = Path(..., description="Tenant ID"), background_tasks: BackgroundTasks = BackgroundTasks(), current_tenant: str = Depends(get_current_tenant_id_dep), db: AsyncSession = Depends(get_db) ): """ Start a new training job for all tenant products. This is the main entry point for the training pipeline: API → Training Service → Trainer → Data Processor → Prophet Manager """ try: # Validate tenant access if tenant_id != current_tenant: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Access denied to tenant resources" ) logger.info(f"Starting training job for tenant {tenant_id}") training_service = TrainingService(db_session=db) # Delegate to training service (Step 1 of the flow) result = await training_service.start_training_job( tenant_id=tenant_id, bakery_location=request.bakery_location or (40.4168, -3.7038), # Default Madrid requested_start=request.start_date if request.start_date else None, requested_end=request.end_date if request.end_date else None, job_id=request.job_id ) return TrainingJobResponse(**result) except ValueError as e: logger.error(f"Training job validation error: {str(e)}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) ) except Exception as e: logger.error(f"Training job failed: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Training job failed" ) @router.post("/tenants/{tenant_id}/training/products/{product_name}", response_model=TrainingJobResponse) async def start_single_product_training( request: SingleProductTrainingRequest, tenant_id: str = Path(..., description="Tenant ID"), product_name: str = Path(..., description="Product name"), current_tenant: str = Depends(get_current_tenant_id_dep), db: AsyncSession = Depends(get_db) ): """ Start training for a single product. Uses the same pipeline but filters for specific product. """ try: # Validate tenant access if tenant_id != current_tenant: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Access denied to tenant resources" ) logger.info(f"Starting single product training for {product_name} (tenant {tenant_id})") # Delegate to training service result = await training_service.start_single_product_training( tenant_id=tenant_id, product_name=product_name, sales_data=request.sales_data, bakery_location=request.bakery_location or (40.4168, -3.7038), weather_data=request.weather_data, traffic_data=request.traffic_data, job_id=request.job_id ) return TrainingJobResponse(**result) except ValueError as e: logger.error(f"Single product training validation error: {str(e)}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=str(e) ) except Exception as e: logger.error(f"Single product training failed: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Single product training failed" ) @router.post("/tenants/{tenant_id}/training/jobs/{job_id}/cancel") async def cancel_training_job( tenant_id: str = Path(..., description="Tenant ID"), job_id: str = Path(..., description="Job ID"), current_tenant: str = Depends(get_current_tenant_id_dep), db: AsyncSession = Depends(get_db) ): """ Cancel a running training job. """ try: # Validate tenant access if tenant_id != current_tenant: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Access denied to tenant resources" ) # TODO: Implement job cancellation logger.info(f"Cancelling training job {job_id} for tenant {tenant_id}") return {"message": "Training job cancelled successfully"} except Exception as e: logger.error(f"Failed to cancel training job: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to cancel training job" ) @router.get("/tenants/{tenant_id}/training/jobs/{job_id}/logs") async def get_training_logs( tenant_id: str = Path(..., description="Tenant ID"), job_id: str = Path(..., description="Job ID"), limit: int = Query(100, description="Number of log entries to return"), current_tenant: str = Depends(get_current_tenant_id_dep), db: AsyncSession = Depends(get_db) ): """ Get training job logs. """ try: # Validate tenant access if tenant_id != current_tenant: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Access denied to tenant resources" ) # TODO: Implement log retrieval return { "job_id": job_id, "logs": [ f"Training job {job_id} started", "Data preprocessing completed", "Model training completed", "Training job finished successfully" ] } except Exception as e: logger.error(f"Failed to get training logs: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Failed to get training logs" ) @router.get("/health") async def health_check(): """ Health check endpoint for the training service. """ return { "status": "healthy", "service": "training", "version": "1.0.0", "timestamp": datetime.now().isoformat() }