Improve the demo feature of the project

This commit is contained in:
Urtzi Alfaro
2025-10-12 18:47:33 +02:00
parent dbc7f2fa0d
commit 7556a00db7
168 changed files with 10102 additions and 18869 deletions

View File

@@ -20,6 +20,41 @@ logger = structlog.get_logger()
route_builder = RouteBuilder('demo')
async def _background_cloning_task(session_id: str, session_obj_id: UUID, base_tenant_id: str):
"""Background task for orchestrated cloning - creates its own DB session"""
from app.core.database import db_manager
from app.models import DemoSession
from sqlalchemy import select
# Create new database session for background task
async with db_manager.session_factory() as db:
try:
# Get Redis client
redis = await get_redis()
# Fetch the session from the database
result = await db.execute(
select(DemoSession).where(DemoSession.id == session_obj_id)
)
session = result.scalar_one_or_none()
if not session:
logger.error("Session not found for cloning", session_id=session_id)
return
# Create session manager with new DB session
session_manager = DemoSessionManager(db, redis)
await session_manager.trigger_orchestrated_cloning(session, base_tenant_id)
except Exception as e:
logger.error(
"Background cloning failed",
session_id=session_id,
error=str(e),
exc_info=True
)
@router.post(
route_builder.build_base_route("sessions", include_tenant_prefix=False),
response_model=DemoSessionResponse,
@@ -46,23 +81,20 @@ async def create_demo_session(
user_agent=user_agent
)
# Trigger async data cloning job
from app.services.k8s_job_cloner import K8sJobCloner
# Trigger async orchestrated cloning in background
import asyncio
from app.core.config import settings
from app.models import DemoSession
job_cloner = K8sJobCloner()
# Get base tenant ID from config
demo_config = settings.DEMO_ACCOUNTS.get(request.demo_account_type, {})
base_tenant_id = demo_config.get("base_tenant_id", str(session.base_demo_tenant_id))
# Start cloning in background task with session ID (not session object)
asyncio.create_task(
job_cloner.clone_tenant_data(
session.session_id,
"",
str(session.virtual_tenant_id),
request.demo_account_type
)
_background_cloning_task(session.session_id, session.id, base_tenant_id)
)
await session_manager.mark_data_cloned(session.session_id)
await session_manager.mark_redis_populated(session.session_id)
# Generate session token
session_token = jwt.encode(
{
@@ -110,6 +142,61 @@ async def get_session_info(
return session.to_dict()
@router.get(
route_builder.build_resource_detail_route("sessions", "session_id", include_tenant_prefix=False) + "/status",
response_model=dict
)
async def get_session_status(
session_id: str = Path(...),
db: AsyncSession = Depends(get_db),
redis: RedisClient = Depends(get_redis)
):
"""
Get demo session provisioning status
Returns current status of data cloning and readiness.
Use this endpoint for polling (recommended interval: 1-2 seconds).
"""
session_manager = DemoSessionManager(db, redis)
status = await session_manager.get_session_status(session_id)
if not status:
raise HTTPException(status_code=404, detail="Session not found")
return status
@router.post(
route_builder.build_resource_detail_route("sessions", "session_id", include_tenant_prefix=False) + "/retry",
response_model=dict
)
async def retry_session_cloning(
session_id: str = Path(...),
db: AsyncSession = Depends(get_db),
redis: RedisClient = Depends(get_redis)
):
"""
Retry failed cloning operations
Only available for sessions in "failed" or "partial" status.
"""
try:
session_manager = DemoSessionManager(db, redis)
result = await session_manager.retry_failed_cloning(session_id)
return {
"message": "Cloning retry initiated",
"session_id": session_id,
"result": result
}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
logger.error("Failed to retry cloning", error=str(e))
raise HTTPException(status_code=500, detail=str(e))
@router.delete(
route_builder.build_resource_detail_route("sessions", "session_id", include_tenant_prefix=False),
response_model=dict
@@ -129,3 +216,24 @@ async def destroy_demo_session(
except Exception as e:
logger.error("Failed to destroy session", error=str(e))
raise HTTPException(status_code=500, detail=str(e))
@router.post(
route_builder.build_resource_detail_route("sessions", "session_id", include_tenant_prefix=False) + "/destroy",
response_model=dict
)
async def destroy_demo_session_post(
session_id: str = Path(...),
db: AsyncSession = Depends(get_db),
redis: RedisClient = Depends(get_redis)
):
"""Destroy demo session via POST (for frontend compatibility)"""
try:
session_manager = DemoSessionManager(db, redis)
await session_manager.destroy_session(session_id)
return {"message": "Session destroyed successfully", "session_id": session_id}
except Exception as e:
logger.error("Failed to destroy session", error=str(e))
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -36,12 +36,14 @@ class Settings(BaseSettings):
"individual_bakery": {
"email": "demo.individual@panaderiasanpablo.com",
"name": "Panadería San Pablo - Demo",
"subdomain": "demo-sanpablo"
"subdomain": "demo-sanpablo",
"base_tenant_id": "a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6"
},
"central_baker": {
"email": "demo.central@panaderialaespiga.com",
"name": "Panadería La Espiga - Demo",
"subdomain": "demo-laespiga"
"subdomain": "demo-laespiga",
"base_tenant_id": "b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7"
}
}

View File

@@ -1,5 +1,5 @@
"""Demo Session Service Models"""
from .demo_session import DemoSession, DemoSessionStatus
from .demo_session import DemoSession, DemoSessionStatus, CloningStatus
__all__ = ["DemoSession", "DemoSessionStatus"]
__all__ = ["DemoSession", "DemoSessionStatus", "CloningStatus"]

View File

@@ -14,9 +14,21 @@ from shared.database.base import Base
class DemoSessionStatus(enum.Enum):
"""Demo session status"""
ACTIVE = "active"
EXPIRED = "expired"
DESTROYED = "destroyed"
PENDING = "pending" # Data cloning in progress
READY = "ready" # All data loaded, safe to use
FAILED = "failed" # One or more services failed completely
PARTIAL = "partial" # Some services failed, others succeeded
ACTIVE = "active" # User is actively using the session (deprecated, use READY)
EXPIRED = "expired" # Session TTL exceeded
DESTROYED = "destroyed" # Session terminated
class CloningStatus(enum.Enum):
"""Individual service cloning status"""
NOT_STARTED = "not_started"
IN_PROGRESS = "in_progress"
COMPLETED = "completed"
FAILED = "failed"
class DemoSession(Base):
@@ -37,16 +49,24 @@ class DemoSession(Base):
demo_account_type = Column(String(50), nullable=False) # 'individual_bakery', 'central_baker'
# Session lifecycle
status = Column(SQLEnum(DemoSessionStatus, values_callable=lambda obj: [e.value for e in obj]), default=DemoSessionStatus.ACTIVE, index=True)
status = Column(SQLEnum(DemoSessionStatus, values_callable=lambda obj: [e.value for e in obj]), default=DemoSessionStatus.PENDING, index=True)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), index=True)
expires_at = Column(DateTime(timezone=True), nullable=False, index=True)
last_activity_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
destroyed_at = Column(DateTime(timezone=True), nullable=True)
# Cloning progress tracking
cloning_started_at = Column(DateTime(timezone=True), nullable=True)
cloning_completed_at = Column(DateTime(timezone=True), nullable=True)
total_records_cloned = Column(Integer, default=0)
# Per-service cloning status
cloning_progress = Column(JSONB, default=dict) # {service_name: {status, records, started_at, completed_at, error}}
# Session metrics
request_count = Column(Integer, default=0)
data_cloned = Column(Boolean, default=False)
redis_populated = Column(Boolean, default=False)
data_cloned = Column(Boolean, default=False) # Deprecated: use status instead
redis_populated = Column(Boolean, default=False) # Deprecated: use status instead
# Session metadata
session_metadata = Column(JSONB, default=dict)

View File

@@ -27,31 +27,55 @@ class DemoCleanupService:
async def cleanup_expired_sessions(self) -> dict:
"""
Find and cleanup all expired sessions
Also cleans up sessions stuck in PENDING for too long (>5 minutes)
Returns:
Cleanup statistics
"""
from datetime import timedelta
logger.info("Starting demo session cleanup")
now = datetime.now(timezone.utc)
stuck_threshold = now - timedelta(minutes=5) # Sessions pending > 5 min are stuck
# Find expired sessions
# Find expired sessions (any status except EXPIRED and DESTROYED)
result = await self.db.execute(
select(DemoSession).where(
DemoSession.status == DemoSessionStatus.ACTIVE,
DemoSession.status.in_([
DemoSessionStatus.PENDING,
DemoSessionStatus.READY,
DemoSessionStatus.PARTIAL,
DemoSessionStatus.FAILED,
DemoSessionStatus.ACTIVE # Legacy status, kept for compatibility
]),
DemoSession.expires_at < now
)
)
expired_sessions = result.scalars().all()
# Also find sessions stuck in PENDING
stuck_result = await self.db.execute(
select(DemoSession).where(
DemoSession.status == DemoSessionStatus.PENDING,
DemoSession.created_at < stuck_threshold
)
)
stuck_sessions = stuck_result.scalars().all()
# Combine both lists
all_sessions_to_cleanup = list(expired_sessions) + list(stuck_sessions)
stats = {
"total_expired": len(expired_sessions),
"total_stuck": len(stuck_sessions),
"total_to_cleanup": len(all_sessions_to_cleanup),
"cleaned_up": 0,
"failed": 0,
"errors": []
}
for session in expired_sessions:
for session in all_sessions_to_cleanup:
try:
# Mark as expired
session.status = DemoSessionStatus.EXPIRED
@@ -128,6 +152,11 @@ class DemoCleanupService:
now = datetime.now(timezone.utc)
# Count by status
pending_count = len([s for s in all_sessions if s.status == DemoSessionStatus.PENDING])
ready_count = len([s for s in all_sessions if s.status == DemoSessionStatus.READY])
partial_count = len([s for s in all_sessions if s.status == DemoSessionStatus.PARTIAL])
failed_count = len([s for s in all_sessions if s.status == DemoSessionStatus.FAILED])
active_count = len([s for s in all_sessions if s.status == DemoSessionStatus.ACTIVE])
expired_count = len([s for s in all_sessions if s.status == DemoSessionStatus.EXPIRED])
destroyed_count = len([s for s in all_sessions if s.status == DemoSessionStatus.DESTROYED])
@@ -135,13 +164,25 @@ class DemoCleanupService:
# Find sessions that should be expired but aren't marked yet
should_be_expired = len([
s for s in all_sessions
if s.status == DemoSessionStatus.ACTIVE and s.expires_at < now
if s.status in [
DemoSessionStatus.PENDING,
DemoSessionStatus.READY,
DemoSessionStatus.PARTIAL,
DemoSessionStatus.FAILED,
DemoSessionStatus.ACTIVE
] and s.expires_at < now
])
return {
"total_sessions": len(all_sessions),
"active_sessions": active_count,
"expired_sessions": expired_count,
"destroyed_sessions": destroyed_count,
"by_status": {
"pending": pending_count,
"ready": ready_count,
"partial": partial_count,
"failed": failed_count,
"active": active_count, # Legacy
"expired": expired_count,
"destroyed": destroyed_count
},
"pending_cleanup": should_be_expired
}

View File

@@ -0,0 +1,330 @@
"""
Demo Data Cloning Orchestrator
Coordinates asynchronous cloning across microservices
"""
import asyncio
import httpx
import structlog
from datetime import datetime, timezone
from typing import Dict, Any, List, Optional
import os
from enum import Enum
from app.models.demo_session import CloningStatus
logger = structlog.get_logger()
class ServiceDefinition:
"""Definition of a service that can clone demo data"""
def __init__(self, name: str, url: str, required: bool = True, timeout: float = 10.0):
self.name = name
self.url = url
self.required = required # If True, failure blocks session creation
self.timeout = timeout
class CloneOrchestrator:
"""Orchestrates parallel demo data cloning across services"""
def __init__(self):
self.internal_api_key = os.getenv("INTERNAL_API_KEY", "dev-internal-key-change-in-production")
# Define services that participate in cloning
# URLs should be internal Kubernetes service names
self.services = [
ServiceDefinition(
name="tenant",
url=os.getenv("TENANT_SERVICE_URL", "http://tenant-service:8000"),
required=True, # Tenant must succeed - critical for session
timeout=5.0
),
ServiceDefinition(
name="inventory",
url=os.getenv("INVENTORY_SERVICE_URL", "http://inventory-service:8000"),
required=False, # Optional - provides ingredients/recipes
timeout=10.0
),
ServiceDefinition(
name="recipes",
url=os.getenv("RECIPES_SERVICE_URL", "http://recipes-service:8000"),
required=False, # Optional - provides recipes and production batches
timeout=15.0
),
ServiceDefinition(
name="suppliers",
url=os.getenv("SUPPLIERS_SERVICE_URL", "http://suppliers-service:8000"),
required=False, # Optional - provides supplier data and purchase orders
timeout=20.0 # Longer - clones many entities
),
ServiceDefinition(
name="sales",
url=os.getenv("SALES_SERVICE_URL", "http://sales-service:8000"),
required=False, # Optional - provides sales history
timeout=10.0
),
ServiceDefinition(
name="orders",
url=os.getenv("ORDERS_SERVICE_URL", "http://orders-service:8000"),
required=False, # Optional - provides customer orders & procurement
timeout=15.0 # Slightly longer - clones more entities
),
ServiceDefinition(
name="production",
url=os.getenv("PRODUCTION_SERVICE_URL", "http://production-service:8000"),
required=False, # Optional - provides production batches and quality checks
timeout=20.0 # Longer - clones many entities
),
ServiceDefinition(
name="forecasting",
url=os.getenv("FORECASTING_SERVICE_URL", "http://forecasting-service:8000"),
required=False, # Optional - provides historical forecasts
timeout=15.0
),
]
async def clone_all_services(
self,
base_tenant_id: str,
virtual_tenant_id: str,
demo_account_type: str,
session_id: str
) -> Dict[str, Any]:
"""
Orchestrate cloning across all services in parallel
Args:
base_tenant_id: Template tenant UUID
virtual_tenant_id: Target virtual tenant UUID
demo_account_type: Type of demo account
session_id: Session ID for tracing
Returns:
Dictionary with overall status and per-service results
"""
logger.info(
"Starting orchestrated cloning",
session_id=session_id,
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
service_count=len(self.services)
)
start_time = datetime.now(timezone.utc)
# Create tasks for all services
tasks = []
service_map = {}
for service_def in self.services:
task = asyncio.create_task(
self._clone_service(
service_def=service_def,
base_tenant_id=base_tenant_id,
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
session_id=session_id
)
)
tasks.append(task)
service_map[task] = service_def.name
# Wait for all tasks to complete (with individual timeouts)
results = await asyncio.gather(*tasks, return_exceptions=True)
# Process results
service_results = {}
total_records = 0
failed_services = []
required_service_failed = False
for task, result in zip(tasks, results):
service_name = service_map[task]
service_def = next(s for s in self.services if s.name == service_name)
if isinstance(result, Exception):
logger.error(
"Service cloning failed with exception",
service=service_name,
error=str(result)
)
service_results[service_name] = {
"status": CloningStatus.FAILED.value,
"records_cloned": 0,
"error": str(result),
"duration_ms": 0
}
failed_services.append(service_name)
if service_def.required:
required_service_failed = True
else:
service_results[service_name] = result
if result.get("status") == "completed":
total_records += result.get("records_cloned", 0)
elif result.get("status") == "failed":
failed_services.append(service_name)
if service_def.required:
required_service_failed = True
# Determine overall status
if required_service_failed:
overall_status = "failed"
elif failed_services:
overall_status = "partial"
else:
overall_status = "ready"
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
result = {
"overall_status": overall_status,
"total_records_cloned": total_records,
"duration_ms": duration_ms,
"services": service_results,
"failed_services": failed_services,
"completed_at": datetime.now(timezone.utc).isoformat()
}
logger.info(
"Orchestrated cloning completed",
session_id=session_id,
overall_status=overall_status,
total_records=total_records,
duration_ms=duration_ms,
failed_services=failed_services
)
return result
async def _clone_service(
self,
service_def: ServiceDefinition,
base_tenant_id: str,
virtual_tenant_id: str,
demo_account_type: str,
session_id: str
) -> Dict[str, Any]:
"""
Clone data from a single service
Args:
service_def: Service definition
base_tenant_id: Template tenant UUID
virtual_tenant_id: Target virtual tenant UUID
demo_account_type: Type of demo account
session_id: Session ID for tracing
Returns:
Cloning result for this service
"""
logger.info(
"Cloning service data",
service=service_def.name,
url=service_def.url,
session_id=session_id
)
try:
async with httpx.AsyncClient(timeout=service_def.timeout) as client:
response = await client.post(
f"{service_def.url}/internal/demo/clone",
params={
"base_tenant_id": base_tenant_id,
"virtual_tenant_id": virtual_tenant_id,
"demo_account_type": demo_account_type,
"session_id": session_id
},
headers={
"X-Internal-API-Key": self.internal_api_key
}
)
if response.status_code == 200:
result = response.json()
logger.info(
"Service cloning succeeded",
service=service_def.name,
records=result.get("records_cloned", 0),
duration_ms=result.get("duration_ms", 0)
)
return result
else:
error_msg = f"HTTP {response.status_code}: {response.text}"
logger.error(
"Service cloning failed",
service=service_def.name,
error=error_msg
)
return {
"service": service_def.name,
"status": "failed",
"records_cloned": 0,
"error": error_msg,
"duration_ms": 0
}
except asyncio.TimeoutError:
error_msg = f"Timeout after {service_def.timeout}s"
logger.error(
"Service cloning timeout",
service=service_def.name,
timeout=service_def.timeout
)
return {
"service": service_def.name,
"status": "failed",
"records_cloned": 0,
"error": error_msg,
"duration_ms": int(service_def.timeout * 1000)
}
except Exception as e:
logger.error(
"Service cloning exception",
service=service_def.name,
error=str(e),
exc_info=True
)
return {
"service": service_def.name,
"status": "failed",
"records_cloned": 0,
"error": str(e),
"duration_ms": 0
}
async def health_check_services(self) -> Dict[str, bool]:
"""
Check health of all cloning endpoints
Returns:
Dictionary mapping service names to availability status
"""
tasks = []
service_names = []
for service_def in self.services:
task = asyncio.create_task(self._check_service_health(service_def))
tasks.append(task)
service_names.append(service_def.name)
results = await asyncio.gather(*tasks, return_exceptions=True)
return {
name: (result is True)
for name, result in zip(service_names, results)
}
async def _check_service_health(self, service_def: ServiceDefinition) -> bool:
"""Check if a service's clone endpoint is available"""
try:
async with httpx.AsyncClient(timeout=2.0) as client:
response = await client.get(
f"{service_def.url}/internal/demo/clone/health",
headers={"X-Internal-API-Key": self.internal_api_key}
)
return response.status_code == 200
except Exception:
return False

View File

@@ -1,166 +0,0 @@
"""
Kubernetes Job-based Demo Data Cloner
Triggers a K8s Job to clone demo data at the database level
"""
import httpx
import structlog
from typing import Dict, Any
import os
logger = structlog.get_logger()
class K8sJobCloner:
"""Triggers Kubernetes Jobs to clone demo data"""
def __init__(self):
self.k8s_api_url = os.getenv("KUBERNETES_SERVICE_HOST")
self.namespace = os.getenv("POD_NAMESPACE", "bakery-ia")
self.clone_job_image = os.getenv("CLONE_JOB_IMAGE", "bakery/inventory-service:latest")
# Service account token for K8s API access
with open("/var/run/secrets/kubernetes.io/serviceaccount/token", "r") as f:
self.token = f.read()
async def clone_tenant_data(
self,
session_id: str,
base_demo_tenant_id: str,
virtual_tenant_id: str,
demo_account_type: str
) -> Dict[str, Any]:
"""
Clone demo data by creating a Kubernetes Job
Args:
session_id: Session ID
base_demo_tenant_id: Base demo tenant UUID (not used in job approach)
virtual_tenant_id: Virtual tenant UUID for this session
demo_account_type: Type of demo account
Returns:
Job creation status
"""
logger.info(
"Triggering demo data cloning job",
session_id=session_id,
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
clone_image=self.clone_job_image
)
job_name = f"demo-clone-{virtual_tenant_id[:8]}"
# Create Job manifest
job_manifest = {
"apiVersion": "batch/v1",
"kind": "Job",
"metadata": {
"name": job_name,
"namespace": self.namespace,
"labels": {
"app": "demo-clone",
"session-id": session_id,
"component": "runtime"
}
},
"spec": {
"ttlSecondsAfterFinished": 3600,
"backoffLimit": 2,
"template": {
"metadata": {
"labels": {"app": "demo-clone"}
},
"spec": {
"restartPolicy": "Never",
"containers": [{
"name": "clone-data",
"image": self.clone_job_image, # Configured via environment variable
"imagePullPolicy": "IfNotPresent", # Don't pull if image exists locally
"command": ["python", "/app/scripts/demo/clone_demo_tenant.py"],
"env": [
{"name": "VIRTUAL_TENANT_ID", "value": virtual_tenant_id},
{"name": "DEMO_ACCOUNT_TYPE", "value": demo_account_type},
{
"name": "INVENTORY_DATABASE_URL",
"valueFrom": {
"secretKeyRef": {
"name": "database-secrets",
"key": "INVENTORY_DATABASE_URL"
}
}
},
{
"name": "SALES_DATABASE_URL",
"valueFrom": {
"secretKeyRef": {
"name": "database-secrets",
"key": "SALES_DATABASE_URL"
}
}
},
{
"name": "ORDERS_DATABASE_URL",
"valueFrom": {
"secretKeyRef": {
"name": "database-secrets",
"key": "ORDERS_DATABASE_URL"
}
}
},
{"name": "LOG_LEVEL", "value": "INFO"}
],
"resources": {
"requests": {"memory": "256Mi", "cpu": "100m"},
"limits": {"memory": "512Mi", "cpu": "500m"}
}
}]
}
}
}
}
try:
# Create the Job via K8s API
async with httpx.AsyncClient(verify=False, timeout=30.0) as client:
response = await client.post(
f"https://{self.k8s_api_url}/apis/batch/v1/namespaces/{self.namespace}/jobs",
json=job_manifest,
headers={
"Authorization": f"Bearer {self.token}",
"Content-Type": "application/json"
}
)
if response.status_code == 201:
logger.info(
"Demo clone job created successfully",
job_name=job_name,
session_id=session_id
)
return {
"success": True,
"job_name": job_name,
"method": "kubernetes_job"
}
else:
logger.error(
"Failed to create demo clone job",
status_code=response.status_code,
response=response.text
)
return {
"success": False,
"error": f"K8s API returned {response.status_code}"
}
except Exception as e:
logger.error(
"Error creating demo clone job",
error=str(e),
exc_info=True
)
return {
"success": False,
"error": str(e)
}

View File

@@ -11,8 +11,9 @@ import uuid
import secrets
import structlog
from app.models import DemoSession, DemoSessionStatus
from app.models import DemoSession, DemoSessionStatus, CloningStatus
from app.core import RedisClient, settings
from app.services.clone_orchestrator import CloneOrchestrator
logger = structlog.get_logger()
@@ -23,6 +24,7 @@ class DemoSessionManager:
def __init__(self, db: AsyncSession, redis: RedisClient):
self.db = db
self.redis = redis
self.orchestrator = CloneOrchestrator()
async def create_session(
self,
@@ -56,16 +58,23 @@ class DemoSessionManager:
if not demo_config:
raise ValueError(f"Invalid demo account type: {demo_account_type}")
# Get base tenant ID for cloning
base_tenant_id_str = demo_config.get("base_tenant_id")
if not base_tenant_id_str:
raise ValueError(f"Base tenant ID not configured for demo account type: {demo_account_type}")
base_tenant_id = uuid.UUID(base_tenant_id_str)
# Create session record
session = DemoSession(
session_id=session_id,
user_id=uuid.UUID(user_id) if user_id else None,
ip_address=ip_address,
user_agent=user_agent,
base_demo_tenant_id=uuid.uuid4(), # Will be set by seeding script
base_demo_tenant_id=base_tenant_id,
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
status=DemoSessionStatus.ACTIVE,
status=DemoSessionStatus.PENDING, # Start as pending until cloning completes
created_at=datetime.now(timezone.utc),
expires_at=datetime.now(timezone.utc) + timedelta(
minutes=settings.DEMO_SESSION_DURATION_MINUTES
@@ -265,3 +274,173 @@ class DemoSessionManager:
) / max(len([s for s in all_sessions if s.destroyed_at]), 1),
"total_requests": sum(s.request_count for s in all_sessions)
}
async def trigger_orchestrated_cloning(
self,
session: DemoSession,
base_tenant_id: str
) -> Dict[str, Any]:
"""
Trigger orchestrated cloning across all services
Args:
session: Demo session
base_tenant_id: Template tenant ID to clone from
Returns:
Orchestration result
"""
logger.info(
"Triggering orchestrated cloning",
session_id=session.session_id,
virtual_tenant_id=str(session.virtual_tenant_id)
)
# Mark cloning as started
session.cloning_started_at = datetime.now(timezone.utc)
await self.db.commit()
# Run orchestration
result = await self.orchestrator.clone_all_services(
base_tenant_id=base_tenant_id,
virtual_tenant_id=str(session.virtual_tenant_id),
demo_account_type=session.demo_account_type,
session_id=session.session_id
)
# Update session with results
await self._update_session_from_clone_result(session, result)
return result
async def _update_session_from_clone_result(
self,
session: DemoSession,
clone_result: Dict[str, Any]
):
"""Update session with cloning results"""
# Map overall status to session status
overall_status = clone_result.get("overall_status")
if overall_status == "ready":
session.status = DemoSessionStatus.READY
elif overall_status == "failed":
session.status = DemoSessionStatus.FAILED
elif overall_status == "partial":
session.status = DemoSessionStatus.PARTIAL
# Update cloning metadata
session.cloning_completed_at = datetime.now(timezone.utc)
session.total_records_cloned = clone_result.get("total_records_cloned", 0)
session.cloning_progress = clone_result.get("services", {})
# Mark legacy flags for backward compatibility
if overall_status in ["ready", "partial"]:
session.data_cloned = True
session.redis_populated = True
await self.db.commit()
await self.db.refresh(session)
# Cache status in Redis for fast polling
await self._cache_session_status(session)
logger.info(
"Session updated with clone results",
session_id=session.session_id,
status=session.status.value,
total_records=session.total_records_cloned
)
async def _cache_session_status(self, session: DemoSession):
"""Cache session status in Redis for fast status checks"""
status_key = f"session:{session.session_id}:status"
status_data = {
"session_id": session.session_id,
"status": session.status.value,
"progress": session.cloning_progress,
"total_records_cloned": session.total_records_cloned,
"cloning_started_at": session.cloning_started_at.isoformat() if session.cloning_started_at else None,
"cloning_completed_at": session.cloning_completed_at.isoformat() if session.cloning_completed_at else None,
"expires_at": session.expires_at.isoformat()
}
import json as json_module
await self.redis.client.setex(
status_key,
7200, # Cache for 2 hours
json_module.dumps(status_data) # Convert to JSON string
)
async def get_session_status(self, session_id: str) -> Dict[str, Any]:
"""
Get current session status with cloning progress
Args:
session_id: Session ID
Returns:
Status information including per-service progress
"""
# Try Redis cache first
status_key = f"session:{session_id}:status"
cached = await self.redis.client.get(status_key)
if cached:
import json
return json.loads(cached)
# Fall back to database
session = await self.get_session(session_id)
if not session:
return None
await self._cache_session_status(session)
return {
"session_id": session.session_id,
"status": session.status.value,
"progress": session.cloning_progress,
"total_records_cloned": session.total_records_cloned,
"cloning_started_at": session.cloning_started_at.isoformat() if session.cloning_started_at else None,
"cloning_completed_at": session.cloning_completed_at.isoformat() if session.cloning_completed_at else None,
"expires_at": session.expires_at.isoformat()
}
async def retry_failed_cloning(
self,
session_id: str,
services: Optional[list] = None
) -> Dict[str, Any]:
"""
Retry failed cloning operations
Args:
session_id: Session ID
services: Specific services to retry (defaults to all failed)
Returns:
Retry result
"""
session = await self.get_session(session_id)
if not session:
raise ValueError(f"Session not found: {session_id}")
if session.status not in [DemoSessionStatus.FAILED, DemoSessionStatus.PARTIAL]:
raise ValueError(f"Cannot retry session in {session.status.value} state")
logger.info(
"Retrying failed cloning",
session_id=session_id,
services=services
)
# Get base tenant ID from config
demo_config = settings.DEMO_ACCOUNTS.get(session.demo_account_type)
base_tenant_id = demo_config.get("base_tenant_id", str(session.base_demo_tenant_id))
# Trigger new cloning attempt
result = await self.trigger_orchestrated_cloning(session, base_tenant_id)
return result

View File

@@ -0,0 +1,81 @@
"""Add cloning status tracking
Revision ID: 002
Revises: 001
Create Date: 2025-01-10
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers
revision = '002'
down_revision = 'a1b2c3d4e5f6' # References the actual initial schema revision
branch_labels = None
depends_on = None
def upgrade():
"""Add new status values and cloning tracking fields"""
# Add new columns for cloning progress
op.add_column('demo_sessions', sa.Column('cloning_started_at', sa.DateTime(timezone=True), nullable=True))
op.add_column('demo_sessions', sa.Column('cloning_completed_at', sa.DateTime(timezone=True), nullable=True))
op.add_column('demo_sessions', sa.Column('total_records_cloned', sa.Integer(), server_default='0', nullable=False))
op.add_column('demo_sessions', sa.Column('cloning_progress', postgresql.JSONB(astext_type=sa.Text()), server_default='{}', nullable=False))
# Update the status enum to include new values
# PostgreSQL doesn't support IF NOT EXISTS for enum values in older versions
# We need to check if values exist before adding them
from sqlalchemy import text
conn = op.get_bind()
# Check and add each enum value if it doesn't exist
enum_values_to_add = ['pending', 'ready', 'failed', 'partial']
for value in enum_values_to_add:
# Check if the enum value already exists
result = conn.execute(text("""
SELECT EXISTS (
SELECT 1 FROM pg_enum
WHERE enumlabel = :value
AND enumtypid = (
SELECT oid FROM pg_type WHERE typname = 'demosessionstatus'
)
);
"""), {"value": value})
exists = result.scalar()
if not exists:
# Add the enum value
# Note: ALTER TYPE ADD VALUE cannot run inside a transaction block in PostgreSQL
# but Alembic handles this for us
conn.execute(text(f"ALTER TYPE demosessionstatus ADD VALUE '{value}'"))
# Update existing sessions: active → ready
op.execute("""
UPDATE demo_sessions
SET status = 'ready'
WHERE status = 'active' AND data_cloned = true;
""")
def downgrade():
"""Remove cloning status tracking"""
# Remove new columns
op.drop_column('demo_sessions', 'cloning_progress')
op.drop_column('demo_sessions', 'total_records_cloned')
op.drop_column('demo_sessions', 'cloning_completed_at')
op.drop_column('demo_sessions', 'cloning_started_at')
# Note: Cannot easily remove enum values in PostgreSQL
# Migration down would require recreating the enum type
op.execute("""
UPDATE demo_sessions
SET status = 'active'
WHERE status IN ('ready', 'pending', 'failed', 'partial');
""")