New alert service

This commit is contained in:
Urtzi Alfaro
2025-12-05 20:07:01 +01:00
parent 1fe3a73549
commit 667e6e0404
393 changed files with 26002 additions and 61033 deletions

View File

@@ -27,6 +27,7 @@ async def _background_cloning_task(session_id: str, session_obj_id: UUID, base_t
from app.core.database import db_manager
from app.models import DemoSession
from sqlalchemy import select
from app.core.redis_wrapper import get_redis
# Create new database session for background task
async with db_manager.session_factory() as db:

View File

@@ -1,6 +1,14 @@
"""
Demo Data Cloning Orchestrator
Coordinates asynchronous cloning across microservices
ARCHITECTURE NOTE:
This orchestrator now uses the Strategy Pattern for demo cloning.
- ProfessionalCloningStrategy: Single-tenant demos
- EnterpriseCloningStrategy: Multi-tenant demos with parent + children
- CloningStrategyFactory: Type-safe strategy selection
No recursion possible - strategies are leaf nodes that compose helpers.
"""
import asyncio
@@ -12,6 +20,11 @@ import os
from enum import Enum
from app.models.demo_session import CloningStatus
from app.services.cloning_strategies import (
CloningStrategy,
CloningContext,
CloningStrategyFactory
)
logger = structlog.get_logger()
@@ -101,18 +114,20 @@ class CloneOrchestrator:
required=False, # Optional - provides procurement and purchase orders
timeout=25.0 # Longer - clones many procurement entities
),
ServiceDefinition(
name="distribution",
url=os.getenv("DISTRIBUTION_SERVICE_URL", "http://distribution-service:8000"),
required=False, # Optional - provides distribution routes and shipments (enterprise only)
timeout=30.0 # Longer - clones routes, shipments, and schedules
),
ServiceDefinition(
name="orchestrator",
url=os.getenv("ORCHESTRATOR_SERVICE_URL", "http://orchestrator-service:8000"),
required=False, # Optional - provides orchestration run history
timeout=15.0 # Standard timeout for orchestration data
),
ServiceDefinition(
name="alert_processor",
url=os.getenv("ALERT_PROCESSOR_SERVICE_URL", "http://alert-processor-api:8010"),
required=False, # Optional - provides alert and prevented issue history
timeout=15.0 # Standard timeout for alert data
),
# Note: alert_processor removed - uses event-driven architecture via RabbitMQ
# No historical data to clone, processes events in real-time
]
async def _update_progress_in_redis(
@@ -185,192 +200,116 @@ class CloneOrchestrator:
services_filter: Optional[List[str]] = None
) -> Dict[str, Any]:
"""
Orchestrate cloning across all services in parallel
Orchestrate cloning using Strategy Pattern
This is the main entry point for all demo cloning operations.
Selects the appropriate strategy based on demo_account_type and delegates to it.
Args:
base_tenant_id: Template tenant UUID
virtual_tenant_id: Target virtual tenant UUID
demo_account_type: Type of demo account
demo_account_type: Type of demo account ("professional" or "enterprise")
session_id: Session ID for tracing
session_metadata: Additional session metadata (for enterprise demos)
services_filter: Optional list of service names to clone (BUG-007 fix)
session_metadata: Additional session metadata (required for enterprise demos)
services_filter: Optional list of service names to clone
Returns:
Dictionary with overall status and per-service results
"""
# BUG-007 FIX: Filter services if specified
services_to_clone = self.services
if services_filter:
services_to_clone = [s for s in self.services if s.name in services_filter]
logger.info(
f"Filtering to {len(services_to_clone)} services",
session_id=session_id,
services_filter=services_filter
)
Raises:
ValueError: If demo_account_type is not supported
"""
logger.info(
"Starting orchestrated cloning",
"Starting orchestrated cloning with strategy pattern",
session_id=session_id,
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
service_count=len(services_to_clone),
is_enterprise=demo_account_type == "enterprise"
)
# Check if this is an enterprise demo
if demo_account_type == "enterprise" and session_metadata:
# Validate that this is actually an enterprise demo based on metadata
is_enterprise = session_metadata.get("is_enterprise", False)
child_configs = session_metadata.get("child_configs", [])
child_tenant_ids = session_metadata.get("child_tenant_ids", [])
try:
# Select strategy based on demo account type
strategy = CloningStrategyFactory.get_strategy(demo_account_type)
if not is_enterprise:
logger.warning(
"Enterprise cloning requested for non-enterprise session",
session_id=session_id,
demo_account_type=demo_account_type
)
elif not child_configs or not child_tenant_ids:
logger.warning(
"Enterprise cloning requested without proper child configuration",
session_id=session_id,
child_config_count=len(child_configs),
child_tenant_id_count=len(child_tenant_ids)
)
return await self._clone_enterprise_demo(
base_tenant_id,
virtual_tenant_id,
session_id,
session_metadata
)
# Additional validation: if account type is not enterprise but has enterprise metadata, log a warning
elif session_metadata and session_metadata.get("is_enterprise", False):
logger.warning(
"Non-enterprise account type with enterprise metadata detected",
logger.info(
"Selected cloning strategy",
session_id=session_id,
strategy=strategy.get_strategy_name(),
demo_account_type=demo_account_type
)
start_time = datetime.now(timezone.utc)
# Build context object
context = CloningContext(
base_tenant_id=base_tenant_id,
virtual_tenant_id=virtual_tenant_id,
session_id=session_id,
demo_account_type=demo_account_type,
session_metadata=session_metadata,
services_filter=services_filter,
orchestrator=self # Inject orchestrator for helper methods
)
# BUG-006 EXTENSION: Rollback stack for professional demos
rollback_stack = []
# Execute strategy
result = await strategy.clone(context)
# BUG-007 FIX: Create tasks for filtered services
tasks = []
service_map = {}
try:
for service_def in services_to_clone:
task = asyncio.create_task(
self._clone_service(
service_def=service_def,
base_tenant_id=base_tenant_id,
# Trigger alert generation after cloning completes (NEW)
if result.get("overall_status") in ["completed", "partial"]:
try:
alert_results = await self._trigger_alert_generation_post_clone(
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
session_id=session_id,
session_metadata=session_metadata
demo_account_type=demo_account_type
)
)
tasks.append(task)
service_map[task] = service_def.name
# Wait for all tasks to complete (with individual timeouts)
results = await asyncio.gather(*tasks, return_exceptions=True)
# Process results
service_results = {}
total_records = 0
failed_services = []
required_service_failed = False
for task, result in zip(tasks, results):
service_name = service_map[task]
service_def = next(s for s in services_to_clone if s.name == service_name)
if isinstance(result, Exception):
result["alert_generation"] = alert_results
except Exception as e:
logger.error(
"Service cloning failed with exception",
service=service_name,
error=str(result)
"Failed to trigger alert generation (non-fatal)",
session_id=session_id,
error=str(e)
)
service_results[service_name] = {
"status": CloningStatus.FAILED.value,
"records_cloned": 0,
"error": str(result),
"duration_ms": 0
}
failed_services.append(service_name)
if service_def.required:
required_service_failed = True
else:
service_results[service_name] = result
if result.get("status") == "completed":
total_records += result.get("records_cloned", 0)
# BUG-006 EXTENSION: Track successful services for rollback
rollback_stack.append({
"service": service_name,
"virtual_tenant_id": virtual_tenant_id,
"session_id": session_id
})
elif result.get("status") == "failed":
failed_services.append(service_name)
if service_def.required:
required_service_failed = True
# Determine overall status
if required_service_failed:
overall_status = "failed"
elif failed_services:
overall_status = "partial"
else:
overall_status = "ready"
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
result = {
"overall_status": overall_status,
"total_records_cloned": total_records,
"duration_ms": duration_ms,
"services": service_results,
"failed_services": failed_services,
"completed_at": datetime.now(timezone.utc).isoformat()
}
result["alert_generation"] = {"error": str(e)}
logger.info(
"Orchestrated cloning completed",
"Cloning strategy completed",
session_id=session_id,
overall_status=overall_status,
total_records=total_records,
duration_ms=duration_ms,
failed_services=failed_services
strategy=strategy.get_strategy_name(),
overall_status=result.get("overall_status"),
duration_ms=result.get("duration_ms"),
alerts_triggered=result.get("alert_generation", {}).get("success", False)
)
return result
except Exception as e:
logger.error("Professional demo cloning failed with fatal exception", error=str(e), exc_info=True)
# BUG-006 EXTENSION: Rollback professional demo on fatal exception
logger.warning("Fatal exception in professional demo, initiating rollback", session_id=session_id)
await self._rollback_professional_demo(rollback_stack, virtual_tenant_id)
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
except ValueError as e:
# Unsupported demo_account_type
logger.error(
"Invalid demo account type",
session_id=session_id,
demo_account_type=demo_account_type,
error=str(e)
)
return {
"overall_status": "failed",
"total_records_cloned": 0,
"duration_ms": duration_ms,
"error": str(e),
"services": {},
"total_records": 0,
"failed_services": [],
"error": f"Fatal exception, resources rolled back: {str(e)}",
"recovery_info": {
"services_completed": len(rollback_stack),
"rollback_performed": True
},
"completed_at": datetime.now(timezone.utc).isoformat()
"duration_ms": 0
}
except Exception as e:
logger.error(
"Fatal exception in clone orchestration",
session_id=session_id,
error=str(e),
exc_info=True
)
return {
"overall_status": "failed",
"error": f"Fatal exception: {str(e)}",
"services": {},
"total_records": 0,
"failed_services": [],
"duration_ms": 0
}
async def _clone_service(
@@ -516,319 +455,9 @@ class CloneOrchestrator:
except Exception:
return False
async def _clone_enterprise_demo(
self,
base_tenant_id: str,
parent_tenant_id: str,
session_id: str,
session_metadata: Dict[str, Any]
) -> Dict[str, Any]:
"""
Clone enterprise demo (parent + children + distribution) with timeout protection
Args:
base_tenant_id: Base template tenant ID for parent
parent_tenant_id: Virtual tenant ID for parent
session_id: Session ID
session_metadata: Session metadata with child configs
Returns:
Dictionary with cloning results
"""
# BUG-005 FIX: Wrap implementation with overall timeout
try:
return await asyncio.wait_for(
self._clone_enterprise_demo_impl(
base_tenant_id=base_tenant_id,
parent_tenant_id=parent_tenant_id,
session_id=session_id,
session_metadata=session_metadata
),
timeout=300.0 # 5 minutes max for entire enterprise flow
)
except asyncio.TimeoutError:
logger.error(
"Enterprise demo cloning timed out",
session_id=session_id,
timeout_seconds=300
)
return {
"overall_status": "failed",
"error": "Enterprise cloning timed out after 5 minutes",
"parent": {},
"children": [],
"distribution": {},
"duration_ms": 300000
}
async def _clone_enterprise_demo_impl(
self,
base_tenant_id: str,
parent_tenant_id: str,
session_id: str,
session_metadata: Dict[str, Any]
) -> Dict[str, Any]:
"""
Implementation of enterprise demo cloning (called by timeout wrapper)
Args:
base_tenant_id: Base template tenant ID for parent
parent_tenant_id: Virtual tenant ID for parent
session_id: Session ID
session_metadata: Session metadata with child configs
Returns:
Dictionary with cloning results
"""
logger.info(
"Starting enterprise demo cloning",
session_id=session_id,
parent_tenant_id=parent_tenant_id
)
start_time = datetime.now(timezone.utc)
results = {
"parent": {},
"children": [],
"distribution": {},
"overall_status": "pending"
}
# BUG-006 FIX: Track resources for rollback
rollback_stack = []
try:
# Step 1: Clone parent tenant
logger.info("Cloning parent tenant", session_id=session_id)
# Update progress: Parent cloning started
await self._update_progress_in_redis(session_id, {
"parent": {"overall_status": "pending"},
"children": [],
"distribution": {}
})
parent_result = await self.clone_all_services(
base_tenant_id=base_tenant_id,
virtual_tenant_id=parent_tenant_id,
demo_account_type="enterprise",
session_id=session_id
)
results["parent"] = parent_result
# Update progress: Parent cloning completed
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": [],
"distribution": {}
})
# BUG-006 FIX: Track parent for potential rollback
if parent_result.get("overall_status") not in ["failed"]:
rollback_stack.append({
"type": "tenant",
"tenant_id": parent_tenant_id,
"session_id": session_id
})
# BUG-003 FIX: Validate parent cloning succeeded before proceeding
parent_status = parent_result.get("overall_status")
if parent_status == "failed":
logger.error(
"Parent cloning failed, aborting enterprise demo",
session_id=session_id,
failed_services=parent_result.get("failed_services", [])
)
results["overall_status"] = "failed"
results["error"] = "Parent tenant cloning failed"
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
return results
if parent_status == "partial":
logger.warning(
"Parent cloning partial, checking if critical services succeeded",
session_id=session_id
)
# Check if tenant service succeeded (critical for children)
parent_services = parent_result.get("services", {})
if parent_services.get("tenant", {}).get("status") != "completed":
logger.error(
"Tenant service failed in parent, cannot create children",
session_id=session_id
)
results["overall_status"] = "failed"
results["error"] = "Parent tenant creation failed - cannot create child tenants"
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
return results
logger.info(
"Parent cloning succeeded, proceeding with children",
session_id=session_id,
parent_status=parent_status
)
# Step 2: Clone each child outlet in parallel
child_configs = session_metadata.get("child_configs", [])
child_tenant_ids = session_metadata.get("child_tenant_ids", [])
if child_configs and child_tenant_ids:
logger.info(
"Cloning child outlets",
session_id=session_id,
child_count=len(child_configs)
)
# Update progress: Children cloning started
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": [{"status": "pending"} for _ in child_configs],
"distribution": {}
})
child_tasks = []
for idx, (child_config, child_id) in enumerate(zip(child_configs, child_tenant_ids)):
task = self._clone_child_outlet(
base_tenant_id=child_config["base_tenant_id"],
virtual_child_id=child_id,
parent_tenant_id=parent_tenant_id,
child_name=child_config["name"],
location=child_config["location"],
session_id=session_id
)
child_tasks.append(task)
children_results = await asyncio.gather(*child_tasks, return_exceptions=True)
results["children"] = [
r if not isinstance(r, Exception) else {"status": "failed", "error": str(r)}
for r in children_results
]
# Update progress: Children cloning completed
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": results["children"],
"distribution": {}
})
# BUG-006 FIX: Track children for potential rollback
for child_result in results["children"]:
if child_result.get("status") not in ["failed"]:
rollback_stack.append({
"type": "tenant",
"tenant_id": child_result.get("child_id"),
"session_id": session_id
})
# Step 3: Setup distribution data
distribution_url = os.getenv("DISTRIBUTION_SERVICE_URL", "http://distribution-service:8000")
logger.info("Setting up distribution data", session_id=session_id, distribution_url=distribution_url)
# Update progress: Distribution starting
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": results["children"],
"distribution": {"status": "pending"}
})
try:
async with httpx.AsyncClient(timeout=120.0) as client: # Increased timeout for distribution setup
response = await client.post(
f"{distribution_url}/internal/demo/setup",
json={
"parent_tenant_id": parent_tenant_id,
"child_tenant_ids": child_tenant_ids,
"session_id": session_id,
"session_metadata": session_metadata # Pass metadata for date adjustment
},
headers={"X-Internal-API-Key": self.internal_api_key}
)
if response.status_code == 200:
results["distribution"] = response.json()
logger.info("Distribution setup completed successfully", session_id=session_id)
# Update progress: Distribution completed
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": results["children"],
"distribution": results["distribution"]
})
else:
error_detail = response.text if response.text else f"HTTP {response.status_code}"
results["distribution"] = {
"status": "failed",
"error": error_detail
}
logger.error(f"Distribution setup failed: {error_detail}", session_id=session_id)
# BUG-006 FIX: Rollback on distribution failure
logger.warning("Distribution failed, initiating rollback", session_id=session_id)
await self._rollback_enterprise_demo(rollback_stack)
results["overall_status"] = "failed"
results["error"] = f"Distribution setup failed, resources rolled back: {error_detail}"
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
return results
except Exception as e:
logger.error("Distribution setup failed", error=str(e), exc_info=True)
results["distribution"] = {"status": "failed", "error": str(e)}
# BUG-006 FIX: Rollback on distribution exception
logger.warning("Distribution exception, initiating rollback", session_id=session_id)
await self._rollback_enterprise_demo(rollback_stack)
results["overall_status"] = "failed"
results["error"] = f"Distribution setup exception, resources rolled back: {str(e)}"
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
return results
# BUG-004 FIX: Stricter status determination
# Only mark as "ready" if ALL components fully succeeded
parent_ready = parent_result.get("overall_status") == "ready"
all_children_ready = all(r.get("status") == "ready" for r in results["children"])
distribution_ready = results["distribution"].get("status") == "completed"
# Check for failures
parent_failed = parent_result.get("overall_status") == "failed"
any_child_failed = any(r.get("status") == "failed" for r in results["children"])
distribution_failed = results["distribution"].get("status") == "failed"
if parent_ready and all_children_ready and distribution_ready:
results["overall_status"] = "ready"
logger.info("Enterprise demo fully ready", session_id=session_id)
elif parent_failed or any_child_failed or distribution_failed:
results["overall_status"] = "failed"
logger.error("Enterprise demo failed", session_id=session_id)
else:
results["overall_status"] = "partial"
results["warning"] = "Some services did not fully clone"
logger.warning("Enterprise demo partially complete", session_id=session_id)
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.info(
"Enterprise demo cloning completed",
session_id=session_id,
overall_status=results["overall_status"],
duration_ms=results["duration_ms"]
)
except Exception as e:
logger.error("Enterprise demo cloning failed", error=str(e), exc_info=True)
# BUG-006 FIX: Rollback on fatal exception
logger.warning("Fatal exception, initiating rollback", session_id=session_id)
await self._rollback_enterprise_demo(rollback_stack)
results["overall_status"] = "failed"
results["error"] = f"Fatal exception, resources rolled back: {str(e)}"
results["recovery_info"] = {
"parent_completed": bool(results.get("parent")),
"children_completed": len(results.get("children", [])),
"distribution_attempted": bool(results.get("distribution"))
}
return results
# REMOVED: _clone_enterprise_demo and _clone_enterprise_demo_impl
# These methods have been replaced by EnterpriseCloningStrategy
# See app/services/cloning_strategies.py for the new implementation
async def _clone_child_outlet(
self,
@@ -1027,3 +656,102 @@ class CloneOrchestrator:
# Continue with remaining rollbacks despite errors
logger.info(f"Professional demo rollback completed for {len(rollback_stack)} services")
async def _trigger_alert_generation_post_clone(
self,
virtual_tenant_id: str,
demo_account_type: str
) -> Dict[str, Any]:
"""
Trigger alert generation after demo data cloning completes.
Calls:
1. Delivery tracking (procurement service) - for all demo types
2. Production alerts (production service) - for professional/enterprise only
Args:
virtual_tenant_id: The virtual tenant ID that was just cloned
demo_account_type: Type of demo account (professional, enterprise, standard)
Returns:
Dict with alert generation results
"""
from app.core.config import settings
results = {}
# Trigger delivery tracking (for all demo types with procurement data)
# CHANGED: Now calls procurement service instead of orchestrator (domain ownership)
try:
procurement_url = os.getenv("PROCUREMENT_SERVICE_URL", "http://procurement-service:8000")
logger.info("Triggering delivery tracking", tenant_id=virtual_tenant_id)
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{procurement_url}/api/internal/delivery-tracking/trigger/{virtual_tenant_id}",
headers={"X-Internal-Service": "demo-session"}
)
if response.status_code == 200:
results["delivery_tracking"] = response.json()
logger.info(
"Delivery tracking triggered successfully",
tenant_id=virtual_tenant_id,
alerts_generated=results["delivery_tracking"].get("alerts_generated", 0)
)
else:
error_detail = response.text
logger.warning(
"Delivery tracking trigger returned non-200 status",
status_code=response.status_code,
error=error_detail
)
results["delivery_tracking"] = {"error": f"HTTP {response.status_code}: {error_detail}"}
except Exception as e:
logger.error("Failed to trigger delivery tracking", tenant_id=virtual_tenant_id, error=str(e))
results["delivery_tracking"] = {"error": str(e)}
# Trigger production alerts (professional/enterprise only)
if demo_account_type in ["professional", "enterprise"]:
try:
production_url = os.getenv("PRODUCTION_SERVICE_URL", "http://production-service:8000")
logger.info("Triggering production alerts", tenant_id=virtual_tenant_id)
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{production_url}/api/internal/production-alerts/trigger/{virtual_tenant_id}",
headers={"X-Internal-Service": "demo-session"}
)
if response.status_code == 200:
results["production_alerts"] = response.json()
logger.info(
"Production alerts triggered successfully",
tenant_id=virtual_tenant_id,
alerts_generated=results["production_alerts"].get("alerts_generated", 0)
)
else:
error_detail = response.text
logger.warning(
"Production alerts trigger returned non-200 status",
status_code=response.status_code,
error=error_detail
)
results["production_alerts"] = {"error": f"HTTP {response.status_code}: {error_detail}"}
except Exception as e:
logger.error("Failed to trigger production alerts", tenant_id=virtual_tenant_id, error=str(e))
results["production_alerts"] = {"error": str(e)}
# Wait 1.5s for alert enrichment to complete
await asyncio.sleep(1.5)
logger.info(
"Alert generation post-clone completed",
tenant_id=virtual_tenant_id,
delivery_alerts=results.get("delivery_tracking", {}).get("alerts_generated", 0),
production_alerts=results.get("production_alerts", {}).get("alerts_generated", 0)
)
return results

View File

@@ -0,0 +1,569 @@
"""
Cloning Strategy Pattern Implementation
Provides explicit, type-safe strategies for different demo account types
"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Dict, Any, List, Optional
from datetime import datetime, timezone
import structlog
logger = structlog.get_logger()
@dataclass
class CloningContext:
"""
Context object containing all data needed for cloning operations
Immutable to prevent state mutation bugs
"""
base_tenant_id: str
virtual_tenant_id: str
session_id: str
demo_account_type: str
session_metadata: Optional[Dict[str, Any]] = None
services_filter: Optional[List[str]] = None
# Orchestrator dependencies (injected)
orchestrator: Any = None # Will be CloneOrchestrator instance
def __post_init__(self):
"""Validate context after initialization"""
if not self.base_tenant_id:
raise ValueError("base_tenant_id is required")
if not self.virtual_tenant_id:
raise ValueError("virtual_tenant_id is required")
if not self.session_id:
raise ValueError("session_id is required")
class CloningStrategy(ABC):
"""
Abstract base class for cloning strategies
Each strategy is a leaf node - no recursion possible
"""
@abstractmethod
async def clone(self, context: CloningContext) -> Dict[str, Any]:
"""
Execute the cloning strategy
Args:
context: Immutable context with all required data
Returns:
Dictionary with cloning results
"""
pass
@abstractmethod
def get_strategy_name(self) -> str:
"""Return the name of this strategy for logging"""
pass
class ProfessionalCloningStrategy(CloningStrategy):
"""
Strategy for single-tenant professional demos
Clones all services for a single virtual tenant
"""
def get_strategy_name(self) -> str:
return "professional"
async def clone(self, context: CloningContext) -> Dict[str, Any]:
"""
Clone demo data for a professional (single-tenant) account
Process:
1. Validate context
2. Clone all services in parallel
3. Handle failures with partial success support
4. Return aggregated results
"""
logger.info(
"Executing professional cloning strategy",
session_id=context.session_id,
virtual_tenant_id=context.virtual_tenant_id,
base_tenant_id=context.base_tenant_id
)
start_time = datetime.now(timezone.utc)
# Determine which services to clone
services_to_clone = context.orchestrator.services
if context.services_filter:
services_to_clone = [
s for s in context.orchestrator.services
if s.name in context.services_filter
]
logger.info(
"Filtering services",
session_id=context.session_id,
services_filter=context.services_filter,
filtered_count=len(services_to_clone)
)
# Rollback stack for cleanup
rollback_stack = []
try:
# Import asyncio here to avoid circular imports
import asyncio
# Create parallel tasks for all services
tasks = []
service_map = {}
for service_def in services_to_clone:
task = asyncio.create_task(
context.orchestrator._clone_service(
service_def=service_def,
base_tenant_id=context.base_tenant_id,
virtual_tenant_id=context.virtual_tenant_id,
demo_account_type=context.demo_account_type,
session_id=context.session_id,
session_metadata=context.session_metadata
)
)
tasks.append(task)
service_map[task] = service_def.name
# Wait for all tasks to complete
results = await asyncio.gather(*tasks, return_exceptions=True)
# Process results
service_results = {}
total_records = 0
failed_services = []
required_service_failed = False
for task, result in zip(tasks, results):
service_name = service_map[task]
service_def = next(s for s in services_to_clone if s.name == service_name)
if isinstance(result, Exception):
logger.error(
f"Service {service_name} cloning failed with exception",
session_id=context.session_id,
error=str(result)
)
service_results[service_name] = {
"status": "failed",
"error": str(result),
"records_cloned": 0
}
failed_services.append(service_name)
if service_def.required:
required_service_failed = True
else:
service_results[service_name] = result
if result.get("status") == "failed":
failed_services.append(service_name)
if service_def.required:
required_service_failed = True
else:
total_records += result.get("records_cloned", 0)
# Track successful services for rollback
if result.get("status") == "completed":
rollback_stack.append({
"type": "service",
"service_name": service_name,
"tenant_id": context.virtual_tenant_id,
"session_id": context.session_id
})
# Determine overall status
if required_service_failed:
overall_status = "failed"
elif failed_services:
overall_status = "partial"
else:
overall_status = "completed"
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.info(
"Professional cloning strategy completed",
session_id=context.session_id,
overall_status=overall_status,
total_records=total_records,
failed_services=failed_services,
duration_ms=duration_ms
)
return {
"overall_status": overall_status,
"services": service_results,
"total_records": total_records,
"failed_services": failed_services,
"duration_ms": duration_ms,
"rollback_stack": rollback_stack
}
except Exception as e:
logger.error(
"Professional cloning strategy failed",
session_id=context.session_id,
error=str(e),
exc_info=True
)
return {
"overall_status": "failed",
"error": str(e),
"services": {},
"total_records": 0,
"failed_services": [],
"duration_ms": int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000),
"rollback_stack": rollback_stack
}
class EnterpriseCloningStrategy(CloningStrategy):
"""
Strategy for multi-tenant enterprise demos
Clones parent tenant + child tenants + distribution data
"""
def get_strategy_name(self) -> str:
return "enterprise"
async def clone(self, context: CloningContext) -> Dict[str, Any]:
"""
Clone demo data for an enterprise (multi-tenant) account
Process:
1. Validate enterprise metadata
2. Clone parent tenant using ProfessionalCloningStrategy
3. Clone child tenants in parallel
4. Update distribution data with child mappings
5. Return aggregated results
NOTE: No recursion - uses ProfessionalCloningStrategy as a helper
"""
logger.info(
"Executing enterprise cloning strategy",
session_id=context.session_id,
parent_tenant_id=context.virtual_tenant_id,
base_tenant_id=context.base_tenant_id
)
start_time = datetime.now(timezone.utc)
results = {
"parent": {},
"children": [],
"distribution": {},
"overall_status": "pending"
}
rollback_stack = []
try:
# Validate enterprise metadata
if not context.session_metadata:
raise ValueError("Enterprise cloning requires session_metadata")
is_enterprise = context.session_metadata.get("is_enterprise", False)
child_configs = context.session_metadata.get("child_configs", [])
child_tenant_ids = context.session_metadata.get("child_tenant_ids", [])
if not is_enterprise:
raise ValueError("session_metadata.is_enterprise must be True")
if not child_configs or not child_tenant_ids:
raise ValueError("Enterprise metadata missing child_configs or child_tenant_ids")
logger.info(
"Enterprise metadata validated",
session_id=context.session_id,
child_count=len(child_configs)
)
# Phase 1: Clone parent tenant
logger.info("Phase 1: Cloning parent tenant", session_id=context.session_id)
# Update progress
await context.orchestrator._update_progress_in_redis(context.session_id, {
"parent": {"overall_status": "pending"},
"children": [],
"distribution": {}
})
# Use ProfessionalCloningStrategy to clone parent
# This is composition, not recursion - explicit strategy usage
professional_strategy = ProfessionalCloningStrategy()
parent_context = CloningContext(
base_tenant_id=context.base_tenant_id,
virtual_tenant_id=context.virtual_tenant_id,
session_id=context.session_id,
demo_account_type="enterprise", # Explicit type for parent tenant
session_metadata=context.session_metadata,
orchestrator=context.orchestrator
)
parent_result = await professional_strategy.clone(parent_context)
results["parent"] = parent_result
# Update progress
await context.orchestrator._update_progress_in_redis(context.session_id, {
"parent": parent_result,
"children": [],
"distribution": {}
})
# Track parent for rollback
if parent_result.get("overall_status") not in ["failed"]:
rollback_stack.append({
"type": "tenant",
"tenant_id": context.virtual_tenant_id,
"session_id": context.session_id
})
# Validate parent success
parent_status = parent_result.get("overall_status")
if parent_status == "failed":
logger.error(
"Parent cloning failed, aborting enterprise demo",
session_id=context.session_id,
failed_services=parent_result.get("failed_services", [])
)
results["overall_status"] = "failed"
results["error"] = "Parent tenant cloning failed"
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
return results
if parent_status == "partial":
# Check if tenant service succeeded (critical)
parent_services = parent_result.get("services", {})
if parent_services.get("tenant", {}).get("status") != "completed":
logger.error(
"Tenant service failed in parent, cannot create children",
session_id=context.session_id
)
results["overall_status"] = "failed"
results["error"] = "Parent tenant creation failed - cannot create child tenants"
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
return results
logger.info(
"Parent cloning succeeded, proceeding with children",
session_id=context.session_id,
parent_status=parent_status
)
# Phase 2: Clone child tenants in parallel
logger.info(
"Phase 2: Cloning child outlets",
session_id=context.session_id,
child_count=len(child_configs)
)
# Update progress
await context.orchestrator._update_progress_in_redis(context.session_id, {
"parent": parent_result,
"children": [{"status": "pending"} for _ in child_configs],
"distribution": {}
})
# Import asyncio for parallel execution
import asyncio
child_tasks = []
for idx, (child_config, child_id) in enumerate(zip(child_configs, child_tenant_ids)):
task = context.orchestrator._clone_child_outlet(
base_tenant_id=child_config.get("base_tenant_id"),
virtual_child_id=child_id,
parent_tenant_id=context.virtual_tenant_id,
child_name=child_config.get("name"),
location=child_config.get("location"),
session_id=context.session_id
)
child_tasks.append(task)
child_results = await asyncio.gather(*child_tasks, return_exceptions=True)
# Process child results
children_data = []
failed_children = 0
for idx, result in enumerate(child_results):
if isinstance(result, Exception):
logger.error(
f"Child {idx} cloning failed",
session_id=context.session_id,
error=str(result)
)
children_data.append({
"status": "failed",
"error": str(result),
"child_id": child_tenant_ids[idx] if idx < len(child_tenant_ids) else None
})
failed_children += 1
else:
children_data.append(result)
if result.get("overall_status") == "failed":
failed_children += 1
else:
# Track for rollback
rollback_stack.append({
"type": "tenant",
"tenant_id": result.get("child_id"),
"session_id": context.session_id
})
results["children"] = children_data
# Update progress
await context.orchestrator._update_progress_in_redis(context.session_id, {
"parent": parent_result,
"children": children_data,
"distribution": {}
})
logger.info(
"Child cloning completed",
session_id=context.session_id,
total_children=len(child_configs),
failed_children=failed_children
)
# Phase 3: Clone distribution data
logger.info("Phase 3: Cloning distribution data", session_id=context.session_id)
# Find distribution service definition
dist_service_def = next(
(s for s in context.orchestrator.services if s.name == "distribution"),
None
)
if dist_service_def:
dist_result = await context.orchestrator._clone_service(
service_def=dist_service_def,
base_tenant_id=context.base_tenant_id,
virtual_tenant_id=context.virtual_tenant_id,
demo_account_type="enterprise",
session_id=context.session_id,
session_metadata=context.session_metadata
)
results["distribution"] = dist_result
# Update progress
await context.orchestrator._update_progress_in_redis(context.session_id, {
"parent": parent_result,
"children": children_data,
"distribution": dist_result
})
# Track for rollback
if dist_result.get("status") == "completed":
rollback_stack.append({
"type": "service",
"service_name": "distribution",
"tenant_id": context.virtual_tenant_id,
"session_id": context.session_id
})
total_records_cloned = parent_result.get("total_records", 0)
total_records_cloned += dist_result.get("records_cloned", 0)
else:
logger.warning("Distribution service not found in orchestrator", session_id=context.session_id)
# Determine overall status
if failed_children == len(child_configs):
overall_status = "failed"
elif failed_children > 0:
overall_status = "partial"
else:
overall_status = "ready"
# Calculate total records cloned (parent + all children)
total_records_cloned = parent_result.get("total_records", 0)
for child in children_data:
if isinstance(child, dict):
total_records_cloned += child.get("total_records", child.get("records_cloned", 0))
results["overall_status"] = overall_status
results["total_records_cloned"] = total_records_cloned # Add for session manager
results["duration_ms"] = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
results["rollback_stack"] = rollback_stack
# Include services from parent for session manager compatibility
results["services"] = parent_result.get("services", {})
logger.info(
"Enterprise cloning strategy completed",
session_id=context.session_id,
overall_status=overall_status,
parent_status=parent_status,
children_status=f"{len(child_configs) - failed_children}/{len(child_configs)} succeeded",
total_records_cloned=total_records_cloned,
duration_ms=results["duration_ms"]
)
return results
except Exception as e:
logger.error(
"Enterprise cloning strategy failed",
session_id=context.session_id,
error=str(e),
exc_info=True
)
return {
"overall_status": "failed",
"error": str(e),
"parent": {},
"children": [],
"distribution": {},
"duration_ms": int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000),
"rollback_stack": rollback_stack
}
class CloningStrategyFactory:
"""
Factory for creating cloning strategies
Provides type-safe strategy selection
"""
_strategies: Dict[str, CloningStrategy] = {
"professional": ProfessionalCloningStrategy(),
"enterprise": EnterpriseCloningStrategy(),
"enterprise_child": ProfessionalCloningStrategy() # Alias: children use professional strategy
}
@classmethod
def get_strategy(cls, demo_account_type: str) -> CloningStrategy:
"""
Get the appropriate cloning strategy for the demo account type
Args:
demo_account_type: Type of demo account ("professional" or "enterprise")
Returns:
CloningStrategy instance
Raises:
ValueError: If demo_account_type is not supported
"""
strategy = cls._strategies.get(demo_account_type)
if not strategy:
raise ValueError(
f"Unknown demo_account_type: {demo_account_type}. "
f"Supported types: {list(cls._strategies.keys())}"
)
return strategy
@classmethod
def register_strategy(cls, name: str, strategy: CloningStrategy):
"""
Register a custom cloning strategy
Args:
name: Strategy name
strategy: Strategy instance
"""
cls._strategies[name] = strategy
logger.info(f"Registered custom cloning strategy: {name}")

View File

@@ -121,13 +121,13 @@ class DemoDataCloner:
if demo_account_type == "professional":
# Professional has production, recipes, suppliers, and procurement
return base_services + ["recipes", "production", "suppliers", "procurement"]
return base_services + ["recipes", "production", "suppliers", "procurement", "alert_processor"]
elif demo_account_type == "enterprise":
# Enterprise has suppliers and procurement
return base_services + ["suppliers", "procurement"]
# Enterprise has suppliers, procurement, and distribution (for parent-child network)
return base_services + ["suppliers", "procurement", "distribution", "alert_processor"]
else:
# Basic tenant has suppliers and procurement
return base_services + ["suppliers", "procurement", "distribution"]
return base_services + ["suppliers", "procurement", "distribution", "alert_processor"]
async def _clone_service_data(
self,
@@ -273,6 +273,7 @@ class DemoDataCloner:
"procurement": settings.PROCUREMENT_SERVICE_URL,
"distribution": settings.DISTRIBUTION_SERVICE_URL,
"forecasting": settings.FORECASTING_SERVICE_URL,
"alert_processor": settings.ALERT_PROCESSOR_SERVICE_URL,
}
return url_map.get(service_name, "")
@@ -309,7 +310,8 @@ class DemoDataCloner:
"suppliers",
"pos",
"distribution",
"procurement"
"procurement",
"alert_processor"
]
# Create deletion tasks for all services

View File

@@ -274,10 +274,13 @@ class DemoSessionManager:
virtual_tenant_id=str(session.virtual_tenant_id)
)
# Mark cloning as started
# Mark cloning as started and update both database and Redis cache
session.cloning_started_at = datetime.now(timezone.utc)
await self.repository.update(session)
# Update Redis cache to reflect that cloning has started
await self._cache_session_status(session)
# Run orchestration
result = await self.orchestrator.clone_all_services(
base_tenant_id=base_tenant_id,
@@ -426,7 +429,7 @@ class DemoSessionManager:
# Map overall status to session status
overall_status = clone_result.get("overall_status")
if overall_status == "ready":
if overall_status in ["ready", "completed"]:
session.status = DemoSessionStatus.READY
elif overall_status == "failed":
session.status = DemoSessionStatus.FAILED
@@ -435,11 +438,13 @@ class DemoSessionManager:
# Update cloning metadata
session.cloning_completed_at = datetime.now(timezone.utc)
session.total_records_cloned = clone_result.get("total_records_cloned", 0)
# The clone result might use 'total_records' or 'total_records_cloned'
session.total_records_cloned = clone_result.get("total_records_cloned",
clone_result.get("total_records", 0))
session.cloning_progress = clone_result.get("services", {})
# Mark legacy flags for backward compatibility
if overall_status in ["ready", "partial"]:
if overall_status in ["ready", "completed", "partial"]:
session.data_cloned = True
session.redis_populated = True

View File

@@ -39,7 +39,7 @@ from typing import List, Dict, Any
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.messaging.rabbitmq import RabbitMQClient
from shared.messaging import RabbitMQClient
from shared.schemas.alert_types import AlertTypeConstants
import structlog

View File

@@ -29,7 +29,7 @@ from pathlib import Path
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.messaging.rabbitmq import RabbitMQClient
from shared.messaging import RabbitMQClient
import structlog
logger = structlog.get_logger()
@@ -65,7 +65,17 @@ DEMO_ALERTS = [
'minimum_stock': 200,
'unit': 'kg',
'supplier_name': 'Harinera San José',
'last_order_date': (datetime.utcnow() - timedelta(days=7)).isoformat()
'last_order_date': (datetime.utcnow() - timedelta(days=7)).isoformat(),
'i18n': {
'title_key': 'alerts.low_stock_warning.title',
'message_key': 'alerts.low_stock_warning.message_generic',
'title_params': {'ingredient_name': 'Harina Tipo 55'},
'message_params': {
'ingredient_name': 'Harina Tipo 55',
'current_stock': 45,
'minimum_stock': 200
}
}
},
'timestamp': datetime.utcnow().isoformat()
},
@@ -94,7 +104,19 @@ DEMO_ALERTS = [
'financial_impact_eur': 450,
'deadline': (datetime.utcnow() + timedelta(hours=6)).isoformat(),
'quantity_needed': 15,
'unit': 'kg'
'unit': 'kg',
'i18n': {
'title_key': 'alerts.supplier_delay.title',
'message_key': 'alerts.supplier_delay.message',
'title_params': {'supplier_name': 'Levadura Fresh'},
'message_params': {
'supplier_name': 'Levadura Fresh',
'ingredient_name': 'Levadura Fresca',
'po_id': 'PO-DEMO-123',
'new_delivery_date': (datetime.utcnow() + timedelta(hours=24)).strftime('%Y-%m-%d'),
'original_delivery_date': (datetime.utcnow() - timedelta(hours=24)).strftime('%Y-%m-%d')
}
}
},
'timestamp': datetime.utcnow().isoformat()
},
@@ -127,7 +149,18 @@ DEMO_ALERTS = [
{'day': 'Wed', 'waste_pct': 23},
{'day': 'Thu', 'waste_pct': 8},
{'day': 'Fri', 'waste_pct': 6}
]
],
'i18n': {
'title_key': 'alerts.waste_trend.title',
'message_key': 'alerts.waste_trend.message',
'title_params': {'product_name': 'Croissant Mantequilla'},
'message_params': {
'product_name': 'Croissant Mantequilla',
'spike_percent': 15,
'trend_days': 3,
'pattern': 'wednesday_overproduction'
}
}
},
'timestamp': datetime.utcnow().isoformat()
},
@@ -149,7 +182,17 @@ DEMO_ALERTS = [
'days_affected': ['2024-11-23', '2024-11-24'],
'expected_demand_increase_pct': 15,
'confidence': 0.78,
'recommended_action': 'Aumentar producción croissants y pan rústico 15%'
'recommended_action': 'Aumentar producción croissants y pan rústico 15%',
'i18n': {
'title_key': 'alerts.demand_surge_weekend.title',
'message_key': 'alerts.demand_surge_weekend.message',
'title_params': {'weekend_date': (datetime.utcnow() + timedelta(days=1)).strftime('%Y-%m-%d')},
'message_params': {
'surge_percent': 15,
'date': (datetime.utcnow() + timedelta(days=1)).strftime('%Y-%m-%d'),
'products': ['croissants', 'pan rustico']
}
}
},
'timestamp': datetime.utcnow().isoformat()
},
@@ -175,7 +218,17 @@ DEMO_ALERTS = [
'last_maintenance': (datetime.utcnow() - timedelta(days=90)).isoformat(),
'maintenance_interval_days': 90,
'supplier_contact': 'TecnoHornos Madrid',
'supplier_phone': '+34-555-6789'
'supplier_phone': '+34-555-6789',
'i18n': {
'title_key': 'alerts.maintenance_required.title',
'message_key': 'alerts.maintenance_required.message_with_hours',
'title_params': {'equipment_name': 'Horno Industrial Principal'},
'message_params': {
'equipment_name': 'Horno Industrial Principal',
'hours_until': 48,
'maintenance_date': (datetime.utcnow() + timedelta(hours=48)).strftime('%Y-%m-%d')
}
}
},
'timestamp': datetime.utcnow().isoformat()
}