Fix new services implementation 1

This commit is contained in:
Urtzi Alfaro
2025-08-13 21:41:00 +02:00
parent 16b8a9d50c
commit 262b3dc9c4
13 changed files with 1702 additions and 1210 deletions

View File

@@ -1,391 +0,0 @@
# Enhanced Inter-Service Communication System
This directory contains the enhanced inter-service communication system that integrates with the new repository pattern architecture. The system provides circuit breakers, caching, monitoring, and event tracking for all service-to-service communications.
## Architecture Overview
### Base Components
1. **BaseServiceClient** - Foundation class providing authentication, retries, and basic HTTP operations
2. **EnhancedServiceClient** - Adds circuit breaker, caching, and monitoring capabilities
3. **ServiceRegistry** - Central registry for managing all enhanced service clients
### Enhanced Service Clients
Each service has a specialized enhanced client:
- **SalesServiceClient** - Sales data, products, data import with optimized caching
- **ExternalServiceClient** - Weather and traffic data collection with external API integration
- **EnhancedAuthServiceClient** - Authentication, user management, permissions with security focus
- **EnhancedTrainingServiceClient** - ML training, model management, deployment with pipeline monitoring
- **EnhancedForecastingServiceClient** - Forecasting, predictions, scenarios with analytics
- **EnhancedTenantServiceClient** - Tenant management, memberships, organization features
- **EnhancedNotificationServiceClient** - Notifications, templates, delivery tracking
## Key Features
### Circuit Breaker Pattern
- **States**: Closed (normal), Open (failing), Half-Open (testing recovery)
- **Configuration**: Failure threshold, recovery timeout, success threshold
- **Monitoring**: State changes tracked and logged
### Intelligent Caching
- **TTL-based**: Different cache durations for different data types
- **Invalidation**: Pattern-based cache invalidation on updates
- **Statistics**: Hit/miss ratios and performance metrics
- **Manual Control**: Clear specific cache patterns when needed
### Event Integration
- **Repository Events**: Entity created/updated/deleted events
- **Correlation IDs**: Track operations across services
- **Metadata**: Rich event metadata for debugging and monitoring
### Monitoring & Metrics
- **Request Metrics**: Success/failure rates, latencies
- **Cache Metrics**: Hit rates, entry counts
- **Circuit Breaker Metrics**: State changes, failure counts
- **Health Checks**: Per-service and aggregate health status
## Usage Examples
### Basic Usage with Service Registry
```python
from shared.clients.enhanced_service_client import ServiceRegistry
from shared.config.base import BaseServiceSettings
# Initialize registry
config = BaseServiceSettings()
registry = ServiceRegistry(config, calling_service="forecasting")
# Get enhanced clients
data_client = registry.get_data_client()
auth_client = registry.get_auth_client()
training_client = registry.get_training_client()
# Use with full features
sales_data = await data_client.get_all_sales_data_with_monitoring(
tenant_id="tenant-123",
start_date="2024-01-01",
end_date="2024-12-31",
correlation_id="forecast-job-456"
)
```
### Data Service Operations
```python
# Get sales data with intelligent caching
sales_data = await data_client.get_sales_data_cached(
tenant_id="tenant-123",
start_date="2024-01-01",
end_date="2024-01-31",
aggregation="daily"
)
# Upload sales data with cache invalidation and events
result = await data_client.upload_sales_data_with_events(
tenant_id="tenant-123",
sales_data=sales_records,
correlation_id="data-import-789"
)
# Get weather data with caching (30 min TTL)
weather_data = await data_client.get_weather_historical_cached(
tenant_id="tenant-123",
start_date="2024-01-01",
end_date="2024-01-31"
)
```
### Authentication & User Management
```python
# Authenticate with security monitoring
auth_result = await auth_client.authenticate_user_cached(
email="user@example.com",
password="password"
)
# Check permissions with caching
has_access = await auth_client.check_user_permissions_cached(
user_id="user-123",
tenant_id="tenant-456",
resource="sales_data",
action="read"
)
# Create user with events
user = await auth_client.create_user_with_events(
user_data={
"email": "new@example.com",
"name": "New User",
"role": "analyst"
},
tenant_id="tenant-123",
correlation_id="user-creation-789"
)
```
### Training & ML Operations
```python
# Create training job with monitoring
job = await training_client.create_training_job_with_monitoring(
tenant_id="tenant-123",
include_weather=True,
include_traffic=False,
min_data_points=30,
correlation_id="training-pipeline-456"
)
# Get active model with caching
model = await training_client.get_active_model_for_product_cached(
tenant_id="tenant-123",
product_name="croissants"
)
# Deploy model with events
deployment = await training_client.deploy_model_with_events(
tenant_id="tenant-123",
model_id="model-789",
correlation_id="deployment-123"
)
# Get pipeline status
status = await training_client.get_training_pipeline_status("tenant-123")
```
### Forecasting & Predictions
```python
# Create forecast with monitoring
forecast = await forecasting_client.create_forecast_with_monitoring(
tenant_id="tenant-123",
model_id="model-456",
start_date="2024-02-01",
end_date="2024-02-29",
correlation_id="forecast-creation-789"
)
# Get predictions with caching
predictions = await forecasting_client.get_predictions_cached(
tenant_id="tenant-123",
forecast_id="forecast-456",
start_date="2024-02-01",
end_date="2024-02-07"
)
# Real-time prediction with caching
prediction = await forecasting_client.create_realtime_prediction_with_monitoring(
tenant_id="tenant-123",
model_id="model-456",
target_date="2024-02-01",
features={"temperature": 20, "day_of_week": 1},
correlation_id="realtime-pred-123"
)
# Get forecasting dashboard
dashboard = await forecasting_client.get_forecasting_dashboard("tenant-123")
```
### Tenant Management
```python
# Create tenant with monitoring
tenant = await tenant_client.create_tenant_with_monitoring(
name="New Bakery Chain",
owner_id="user-123",
description="Multi-location bakery chain",
correlation_id="tenant-creation-456"
)
# Add member with events
membership = await tenant_client.add_tenant_member_with_events(
tenant_id="tenant-123",
user_id="user-456",
role="manager",
correlation_id="member-add-789"
)
# Get tenant analytics
analytics = await tenant_client.get_tenant_analytics("tenant-123")
```
### Notification Management
```python
# Send notification with monitoring
notification = await notification_client.send_notification_with_monitoring(
recipient_id="user-123",
notification_type="forecast_ready",
title="Forecast Complete",
message="Your weekly forecast is ready for review",
tenant_id="tenant-456",
priority="high",
channels=["email", "in_app"],
correlation_id="forecast-notification-789"
)
# Send bulk notification
bulk_result = await notification_client.send_bulk_notification_with_monitoring(
recipients=["user-123", "user-456", "user-789"],
notification_type="system_update",
title="System Maintenance",
message="Scheduled maintenance tonight at 2 AM",
priority="normal",
correlation_id="maintenance-notification-123"
)
# Get delivery analytics
analytics = await notification_client.get_delivery_analytics(
tenant_id="tenant-123",
start_date="2024-01-01",
end_date="2024-01-31"
)
```
## Health Monitoring
### Individual Service Health
```python
# Get specific service health
data_health = data_client.get_data_service_health()
auth_health = auth_client.get_auth_service_health()
training_health = training_client.get_training_service_health()
# Health includes:
# - Circuit breaker status
# - Cache statistics and configuration
# - Service-specific features
# - Supported endpoints
```
### Registry-Level Health
```python
# Get all service health status
all_health = registry.get_all_health_status()
# Get aggregate metrics
metrics = registry.get_aggregate_metrics()
# Returns:
# - Total cache hits/misses and hit rate
# - Circuit breaker states for all services
# - Count of healthy vs total services
```
## Configuration
### Cache TTL Configuration
Each enhanced client has optimized cache TTL values:
```python
# Data Service
sales_cache_ttl = 600 # 10 minutes
weather_cache_ttl = 1800 # 30 minutes
traffic_cache_ttl = 3600 # 1 hour
product_cache_ttl = 300 # 5 minutes
# Auth Service
user_cache_ttl = 300 # 5 minutes
token_cache_ttl = 60 # 1 minute
permission_cache_ttl = 900 # 15 minutes
# Training Service
job_cache_ttl = 180 # 3 minutes
model_cache_ttl = 600 # 10 minutes
metrics_cache_ttl = 300 # 5 minutes
# And so on...
```
### Circuit Breaker Configuration
```python
CircuitBreakerConfig(
failure_threshold=5, # Failures before opening
recovery_timeout=60, # Seconds before testing recovery
success_threshold=2, # Successes needed to close
timeout=30 # Request timeout in seconds
)
```
## Event System Integration
All enhanced clients integrate with the enhanced event system:
### Event Types
- **EntityCreatedEvent** - When entities are created
- **EntityUpdatedEvent** - When entities are modified
- **EntityDeletedEvent** - When entities are removed
### Event Metadata
- **correlation_id** - Track operations across services
- **source_service** - Service that generated the event
- **destination_service** - Target service
- **tenant_id** - Tenant context
- **user_id** - User context
- **tags** - Additional metadata
### Usage in Enhanced Clients
Events are automatically published for:
- Data uploads and modifications
- User creation/updates/deletion
- Training job lifecycle
- Model deployments
- Forecast creation
- Tenant management operations
- Notification delivery
## Error Handling & Resilience
### Circuit Breaker Protection
- Automatically stops requests when services are failing
- Provides fallback to cached data when available
- Gradually tests service recovery
### Retry Logic
- Exponential backoff for transient failures
- Configurable retry counts and delays
- Authentication token refresh on 401 errors
### Cache Fallbacks
- Returns cached data when services are unavailable
- Graceful degradation with stale data warnings
- Manual cache invalidation for data consistency
## Integration with Repository Pattern
The enhanced clients seamlessly integrate with the new repository pattern:
### Service Layer Integration
```python
class ForecastingService:
def __init__(self,
forecast_repository: ForecastRepository,
service_registry: ServiceRegistry):
self.forecast_repository = forecast_repository
self.data_client = service_registry.get_data_client()
self.training_client = service_registry.get_training_client()
async def create_forecast(self, tenant_id: str, model_id: str):
# Get data through enhanced client
sales_data = await self.data_client.get_all_sales_data_with_monitoring(
tenant_id=tenant_id,
correlation_id=f"forecast_data_{datetime.utcnow().isoformat()}"
)
# Use repository for database operations
forecast = await self.forecast_repository.create({
"tenant_id": tenant_id,
"model_id": model_id,
"status": "pending"
})
return forecast
```
This completes the comprehensive enhanced inter-service communication system that integrates seamlessly with the new repository pattern architecture, providing resilience, monitoring, and advanced features for all service interactions.

View File

@@ -0,0 +1,347 @@
# shared/clients/inventory_client.py
"""
Inventory Service Client - Inter-service communication
Handles communication with the inventory service for all other services
"""
import structlog
from typing import Dict, Any, List, Optional, Union
from uuid import UUID
from shared.clients.base_service_client import BaseServiceClient
from shared.config.base import BaseServiceSettings
logger = structlog.get_logger()
class InventoryServiceClient(BaseServiceClient):
"""Client for communicating with the inventory service via gateway"""
def __init__(self, config: BaseServiceSettings):
super().__init__("inventory", config)
def get_service_base_path(self) -> str:
"""Return the base path for inventory service APIs"""
return "/api/v1"
# ================================================================
# INGREDIENT MANAGEMENT
# ================================================================
async def get_ingredient_by_id(self, ingredient_id: UUID, tenant_id: str) -> Optional[Dict[str, Any]]:
"""Get ingredient details by ID"""
try:
result = await self.get(f"ingredients/{ingredient_id}", tenant_id=tenant_id)
if result:
logger.info("Retrieved ingredient from inventory service",
ingredient_id=ingredient_id, tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error fetching ingredient by ID",
error=str(e), ingredient_id=ingredient_id, tenant_id=tenant_id)
return None
async def search_ingredients(
self,
tenant_id: str,
search: Optional[str] = None,
category: Optional[str] = None,
is_active: Optional[bool] = None,
skip: int = 0,
limit: int = 100
) -> List[Dict[str, Any]]:
"""Search ingredients with filters"""
try:
params = {
"skip": skip,
"limit": limit
}
if search:
params["search"] = search
if category:
params["category"] = category
if is_active is not None:
params["is_active"] = is_active
result = await self.get("ingredients", tenant_id=tenant_id, params=params)
ingredients = result if isinstance(result, list) else []
logger.info("Searched ingredients in inventory service",
search_term=search, count=len(ingredients), tenant_id=tenant_id)
return ingredients
except Exception as e:
logger.error("Error searching ingredients",
error=str(e), search=search, tenant_id=tenant_id)
return []
async def get_all_ingredients(self, tenant_id: str, is_active: Optional[bool] = True) -> List[Dict[str, Any]]:
"""Get all ingredients for a tenant (paginated)"""
try:
params = {}
if is_active is not None:
params["is_active"] = is_active
ingredients = await self.get_paginated("ingredients", tenant_id=tenant_id, params=params)
logger.info("Retrieved all ingredients from inventory service",
count=len(ingredients), tenant_id=tenant_id)
return ingredients
except Exception as e:
logger.error("Error fetching all ingredients",
error=str(e), tenant_id=tenant_id)
return []
async def create_ingredient(self, ingredient_data: Dict[str, Any], tenant_id: str) -> Optional[Dict[str, Any]]:
"""Create a new ingredient"""
try:
result = await self.post("ingredients", data=ingredient_data, tenant_id=tenant_id)
if result:
logger.info("Created ingredient in inventory service",
ingredient_name=ingredient_data.get('name'), tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error creating ingredient",
error=str(e), ingredient_data=ingredient_data, tenant_id=tenant_id)
return None
async def update_ingredient(
self,
ingredient_id: UUID,
ingredient_data: Dict[str, Any],
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Update an existing ingredient"""
try:
result = await self.put(f"ingredients/{ingredient_id}", data=ingredient_data, tenant_id=tenant_id)
if result:
logger.info("Updated ingredient in inventory service",
ingredient_id=ingredient_id, tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error updating ingredient",
error=str(e), ingredient_id=ingredient_id, tenant_id=tenant_id)
return None
async def delete_ingredient(self, ingredient_id: UUID, tenant_id: str) -> bool:
"""Delete (deactivate) an ingredient"""
try:
result = await self.delete(f"ingredients/{ingredient_id}", tenant_id=tenant_id)
success = result is not None
if success:
logger.info("Deleted ingredient in inventory service",
ingredient_id=ingredient_id, tenant_id=tenant_id)
return success
except Exception as e:
logger.error("Error deleting ingredient",
error=str(e), ingredient_id=ingredient_id, tenant_id=tenant_id)
return False
async def get_ingredient_stock(
self,
ingredient_id: UUID,
tenant_id: str,
include_unavailable: bool = False
) -> List[Dict[str, Any]]:
"""Get stock entries for an ingredient"""
try:
params = {}
if include_unavailable:
params["include_unavailable"] = include_unavailable
result = await self.get(f"ingredients/{ingredient_id}/stock", tenant_id=tenant_id, params=params)
stock_entries = result if isinstance(result, list) else []
logger.info("Retrieved ingredient stock from inventory service",
ingredient_id=ingredient_id, stock_count=len(stock_entries), tenant_id=tenant_id)
return stock_entries
except Exception as e:
logger.error("Error fetching ingredient stock",
error=str(e), ingredient_id=ingredient_id, tenant_id=tenant_id)
return []
# ================================================================
# STOCK MANAGEMENT
# ================================================================
async def get_stock_levels(self, tenant_id: str, ingredient_ids: Optional[List[UUID]] = None) -> List[Dict[str, Any]]:
"""Get current stock levels"""
try:
params = {}
if ingredient_ids:
params["ingredient_ids"] = [str(id) for id in ingredient_ids]
result = await self.get("stock", tenant_id=tenant_id, params=params)
stock_levels = result if isinstance(result, list) else []
logger.info("Retrieved stock levels from inventory service",
count=len(stock_levels), tenant_id=tenant_id)
return stock_levels
except Exception as e:
logger.error("Error fetching stock levels",
error=str(e), tenant_id=tenant_id)
return []
async def get_low_stock_alerts(self, tenant_id: str) -> List[Dict[str, Any]]:
"""Get low stock alerts"""
try:
result = await self.get("alerts", tenant_id=tenant_id, params={"type": "low_stock"})
alerts = result if isinstance(result, list) else []
logger.info("Retrieved low stock alerts from inventory service",
count=len(alerts), tenant_id=tenant_id)
return alerts
except Exception as e:
logger.error("Error fetching low stock alerts",
error=str(e), tenant_id=tenant_id)
return []
async def consume_stock(
self,
consumption_data: Dict[str, Any],
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Record stock consumption"""
try:
result = await self.post("stock/consume", data=consumption_data, tenant_id=tenant_id)
if result:
logger.info("Recorded stock consumption",
tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error recording stock consumption",
error=str(e), tenant_id=tenant_id)
return None
async def receive_stock(
self,
receipt_data: Dict[str, Any],
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Record stock receipt"""
try:
result = await self.post("stock/receive", data=receipt_data, tenant_id=tenant_id)
if result:
logger.info("Recorded stock receipt",
tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error recording stock receipt",
error=str(e), tenant_id=tenant_id)
return None
# ================================================================
# PRODUCT CLASSIFICATION (for onboarding)
# ================================================================
async def classify_product(
self,
product_name: str,
sales_volume: Optional[float],
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Classify a single product for inventory creation"""
try:
classification_data = {
"product_name": product_name,
"sales_volume": sales_volume
}
result = await self.post("inventory/classify-product", data=classification_data, tenant_id=tenant_id)
if result:
logger.info("Classified product",
product=product_name,
classification=result.get('product_type'),
confidence=result.get('confidence_score'),
tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error classifying product",
error=str(e), product=product_name, tenant_id=tenant_id)
return None
async def classify_products_batch(
self,
products: List[Dict[str, Any]],
tenant_id: str
) -> Optional[Dict[str, Any]]:
"""Classify multiple products for onboarding automation"""
try:
classification_data = {
"products": products
}
result = await self.post("inventory/classify-products-batch", data=classification_data, tenant_id=tenant_id)
if result:
suggestions = result.get('suggestions', [])
business_model = result.get('business_model_analysis', {}).get('model', 'unknown')
logger.info("Batch classification complete",
total_products=len(suggestions),
business_model=business_model,
tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error in batch classification",
error=str(e), products_count=len(products), tenant_id=tenant_id)
return None
# ================================================================
# DASHBOARD AND ANALYTICS
# ================================================================
async def get_inventory_dashboard(self, tenant_id: str) -> Optional[Dict[str, Any]]:
"""Get inventory dashboard data"""
try:
result = await self.get("dashboard", tenant_id=tenant_id)
if result:
logger.info("Retrieved inventory dashboard data", tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error fetching inventory dashboard",
error=str(e), tenant_id=tenant_id)
return None
async def get_inventory_summary(self, tenant_id: str) -> Optional[Dict[str, Any]]:
"""Get inventory summary statistics"""
try:
result = await self.get("dashboard/summary", tenant_id=tenant_id)
if result:
logger.info("Retrieved inventory summary", tenant_id=tenant_id)
return result
except Exception as e:
logger.error("Error fetching inventory summary",
error=str(e), tenant_id=tenant_id)
return None
# ================================================================
# UTILITY METHODS
# ================================================================
async def health_check(self) -> bool:
"""Check if inventory service is healthy"""
try:
result = await self.get("../health") # Health endpoint is not tenant-scoped
return result is not None
except Exception as e:
logger.error("Inventory service health check failed", error=str(e))
return False
# Factory function for dependency injection
def create_inventory_client(config: BaseServiceSettings) -> InventoryServiceClient:
"""Create inventory service client instance"""
return InventoryServiceClient(config)
# Convenience function for quick access (requires config to be passed)
async def get_inventory_client(config: BaseServiceSettings) -> InventoryServiceClient:
"""Get inventory service client instance"""
return create_inventory_client(config)