New enterprise feature
This commit is contained in:
@@ -3,5 +3,6 @@
|
||||
from .demo_sessions import router as demo_sessions_router
|
||||
from .demo_accounts import router as demo_accounts_router
|
||||
from .demo_operations import router as demo_operations_router
|
||||
from .internal import router as internal_router
|
||||
|
||||
__all__ = ["demo_sessions_router", "demo_accounts_router", "demo_operations_router"]
|
||||
__all__ = ["demo_sessions_router", "demo_accounts_router", "demo_operations_router", "internal_router"]
|
||||
|
||||
@@ -5,6 +5,7 @@ Demo Sessions API - Atomic CRUD operations on DemoSession model
|
||||
from fastapi import APIRouter, Depends, HTTPException, Path, Query, Request
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
from datetime import datetime, timezone
|
||||
import structlog
|
||||
import jwt
|
||||
|
||||
@@ -54,6 +55,41 @@ async def _background_cloning_task(session_id: str, session_obj_id: UUID, base_t
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
# Attempt to update session status to failed if possible
|
||||
try:
|
||||
from app.core.database import db_manager
|
||||
from app.models import DemoSession
|
||||
from sqlalchemy import select, update
|
||||
|
||||
# Try to update the session directly in DB to mark it as failed
|
||||
async with db_manager.session_factory() as update_db:
|
||||
from app.models import DemoSessionStatus
|
||||
update_result = await update_db.execute(
|
||||
update(DemoSession)
|
||||
.where(DemoSession.id == session_obj_id)
|
||||
.values(status=DemoSessionStatus.FAILED, cloning_completed_at=datetime.now(timezone.utc))
|
||||
)
|
||||
await update_db.commit()
|
||||
except Exception as update_error:
|
||||
logger.error(
|
||||
"Failed to update session status to FAILED after background task error",
|
||||
session_id=session_id,
|
||||
error=str(update_error)
|
||||
)
|
||||
|
||||
|
||||
def _handle_task_result(task, session_id: str):
|
||||
"""Handle the result of the background cloning task"""
|
||||
try:
|
||||
# This will raise the exception if the task failed
|
||||
task.result()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Background cloning task failed with exception",
|
||||
session_id=session_id,
|
||||
error=str(e),
|
||||
exc_info=True
|
||||
)
|
||||
|
||||
|
||||
@router.post(
|
||||
@@ -77,6 +113,7 @@ async def create_demo_session(
|
||||
session_manager = DemoSessionManager(db, redis)
|
||||
session = await session_manager.create_session(
|
||||
demo_account_type=request.demo_account_type,
|
||||
subscription_tier=request.subscription_tier,
|
||||
user_id=request.user_id,
|
||||
ip_address=ip_address,
|
||||
user_agent=user_agent
|
||||
@@ -92,10 +129,14 @@ async def create_demo_session(
|
||||
base_tenant_id = demo_config.get("base_tenant_id", str(session.base_demo_tenant_id))
|
||||
|
||||
# Start cloning in background task with session ID (not session object)
|
||||
asyncio.create_task(
|
||||
# Store task reference in case we need to track it
|
||||
task = asyncio.create_task(
|
||||
_background_cloning_task(session.session_id, session.id, base_tenant_id)
|
||||
)
|
||||
|
||||
# Add error handling for the task to prevent silent failures
|
||||
task.add_done_callback(lambda t: _handle_task_result(t, session.session_id))
|
||||
|
||||
# Generate session token
|
||||
session_token = jwt.encode(
|
||||
{
|
||||
@@ -104,8 +145,8 @@ async def create_demo_session(
|
||||
"demo_account_type": request.demo_account_type,
|
||||
"exp": session.expires_at.timestamp()
|
||||
},
|
||||
"demo-secret-key",
|
||||
algorithm="HS256"
|
||||
settings.JWT_SECRET_KEY,
|
||||
algorithm=settings.JWT_ALGORITHM
|
||||
)
|
||||
|
||||
return {
|
||||
|
||||
82
services/demo_session/app/api/internal.py
Normal file
82
services/demo_session/app/api/internal.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""
|
||||
Internal API for Demo Session Service
|
||||
Handles internal service-to-service operations
|
||||
"""
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Header
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import structlog
|
||||
|
||||
from app.core import get_db, settings
|
||||
from app.core.redis_wrapper import get_redis, DemoRedisWrapper
|
||||
from app.services.data_cloner import DemoDataCloner
|
||||
|
||||
logger = structlog.get_logger()
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
async def verify_internal_api_key(x_internal_api_key: str = Header(None)):
|
||||
"""Verify internal API key for service-to-service communication"""
|
||||
required_key = settings.INTERNAL_API_KEY
|
||||
if x_internal_api_key != required_key:
|
||||
logger.warning("Unauthorized internal API access attempted")
|
||||
raise HTTPException(status_code=403, detail="Invalid internal API key")
|
||||
return True
|
||||
|
||||
|
||||
@router.post("/internal/demo/cleanup")
|
||||
async def cleanup_demo_session_internal(
|
||||
cleanup_request: dict,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
redis: DemoRedisWrapper = Depends(get_redis),
|
||||
_: bool = Depends(verify_internal_api_key)
|
||||
):
|
||||
"""
|
||||
Internal endpoint to cleanup demo session data for a specific tenant
|
||||
Used by rollback mechanisms
|
||||
"""
|
||||
try:
|
||||
tenant_id = cleanup_request.get('tenant_id')
|
||||
session_id = cleanup_request.get('session_id')
|
||||
|
||||
if not all([tenant_id, session_id]):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Missing required parameters: tenant_id, session_id"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Internal cleanup requested",
|
||||
tenant_id=tenant_id,
|
||||
session_id=session_id
|
||||
)
|
||||
|
||||
data_cloner = DemoDataCloner(db, redis)
|
||||
|
||||
# Delete session data for this tenant
|
||||
await data_cloner.delete_session_data(
|
||||
str(tenant_id),
|
||||
session_id
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Internal cleanup completed",
|
||||
tenant_id=tenant_id,
|
||||
session_id=session_id
|
||||
)
|
||||
|
||||
return {
|
||||
"status": "completed",
|
||||
"tenant_id": tenant_id,
|
||||
"session_id": session_id
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
"Internal cleanup failed",
|
||||
error=str(e),
|
||||
tenant_id=cleanup_request.get('tenant_id'),
|
||||
session_id=cleanup_request.get('session_id'),
|
||||
exc_info=True
|
||||
)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to cleanup demo session: {str(e)}")
|
||||
@@ -10,7 +10,7 @@ import structlog
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from app.core import settings, DatabaseManager
|
||||
from app.api import demo_sessions, demo_accounts, demo_operations
|
||||
from app.api import demo_sessions, demo_accounts, demo_operations, internal
|
||||
from shared.redis_utils import initialize_redis, close_redis
|
||||
|
||||
logger = structlog.get_logger()
|
||||
@@ -81,6 +81,7 @@ async def global_exception_handler(request: Request, exc: Exception):
|
||||
app.include_router(demo_sessions.router)
|
||||
app.include_router(demo_accounts.router)
|
||||
app.include_router(demo_operations.router)
|
||||
app.include_router(internal.router)
|
||||
|
||||
|
||||
@app.get("/")
|
||||
|
||||
@@ -16,6 +16,10 @@ from app.models.demo_session import CloningStatus
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
# Import json for Redis serialization
|
||||
import json
|
||||
|
||||
|
||||
class ServiceDefinition:
|
||||
"""Definition of a service that can clone demo data"""
|
||||
|
||||
@@ -29,9 +33,10 @@ class ServiceDefinition:
|
||||
class CloneOrchestrator:
|
||||
"""Orchestrates parallel demo data cloning across services"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, redis_manager=None):
|
||||
from app.core.config import settings
|
||||
self.internal_api_key = settings.INTERNAL_API_KEY
|
||||
self.redis_manager = redis_manager # For real-time progress updates
|
||||
|
||||
# Define services that participate in cloning
|
||||
# URLs should be internal Kubernetes service names
|
||||
@@ -110,6 +115,66 @@ class CloneOrchestrator:
|
||||
),
|
||||
]
|
||||
|
||||
async def _update_progress_in_redis(
|
||||
self,
|
||||
session_id: str,
|
||||
progress_data: Dict[str, Any]
|
||||
):
|
||||
"""Update cloning progress in Redis for real-time frontend polling"""
|
||||
if not self.redis_manager:
|
||||
return # Skip if no Redis manager provided
|
||||
|
||||
try:
|
||||
status_key = f"session:{session_id}:status"
|
||||
client = await self.redis_manager.get_client()
|
||||
|
||||
# Get existing status data or create new
|
||||
existing_data_str = await client.get(status_key)
|
||||
if existing_data_str:
|
||||
status_data = json.loads(existing_data_str)
|
||||
else:
|
||||
# Initialize basic status structure
|
||||
status_data = {
|
||||
"session_id": session_id,
|
||||
"status": "pending",
|
||||
"progress": {},
|
||||
"total_records_cloned": 0
|
||||
}
|
||||
|
||||
# Update progress field with new data
|
||||
status_data["progress"] = progress_data
|
||||
|
||||
# Calculate total records cloned from progress
|
||||
total_records = 0
|
||||
if "parent" in progress_data and "total_records_cloned" in progress_data["parent"]:
|
||||
total_records += progress_data["parent"]["total_records_cloned"]
|
||||
if "children" in progress_data:
|
||||
for child in progress_data["children"]:
|
||||
if isinstance(child, dict) and "records_cloned" in child:
|
||||
total_records += child["records_cloned"]
|
||||
|
||||
status_data["total_records_cloned"] = total_records
|
||||
|
||||
# Update Redis with 2-hour TTL
|
||||
await client.setex(
|
||||
status_key,
|
||||
7200, # 2 hours
|
||||
json.dumps(status_data)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Updated progress in Redis",
|
||||
session_id=session_id,
|
||||
progress_keys=list(progress_data.keys())
|
||||
)
|
||||
except Exception as e:
|
||||
# Don't fail cloning if progress update fails
|
||||
logger.warning(
|
||||
"Failed to update progress in Redis",
|
||||
session_id=session_id,
|
||||
error=str(e)
|
||||
)
|
||||
|
||||
async def clone_all_services(
|
||||
self,
|
||||
base_tenant_id: str,
|
||||
@@ -535,6 +600,14 @@ class CloneOrchestrator:
|
||||
try:
|
||||
# Step 1: Clone parent tenant
|
||||
logger.info("Cloning parent tenant", session_id=session_id)
|
||||
|
||||
# Update progress: Parent cloning started
|
||||
await self._update_progress_in_redis(session_id, {
|
||||
"parent": {"overall_status": "pending"},
|
||||
"children": [],
|
||||
"distribution": {}
|
||||
})
|
||||
|
||||
parent_result = await self.clone_all_services(
|
||||
base_tenant_id=base_tenant_id,
|
||||
virtual_tenant_id=parent_tenant_id,
|
||||
@@ -543,6 +616,13 @@ class CloneOrchestrator:
|
||||
)
|
||||
results["parent"] = parent_result
|
||||
|
||||
# Update progress: Parent cloning completed
|
||||
await self._update_progress_in_redis(session_id, {
|
||||
"parent": parent_result,
|
||||
"children": [],
|
||||
"distribution": {}
|
||||
})
|
||||
|
||||
# BUG-006 FIX: Track parent for potential rollback
|
||||
if parent_result.get("overall_status") not in ["failed"]:
|
||||
rollback_stack.append({
|
||||
@@ -599,6 +679,13 @@ class CloneOrchestrator:
|
||||
child_count=len(child_configs)
|
||||
)
|
||||
|
||||
# Update progress: Children cloning started
|
||||
await self._update_progress_in_redis(session_id, {
|
||||
"parent": parent_result,
|
||||
"children": [{"status": "pending"} for _ in child_configs],
|
||||
"distribution": {}
|
||||
})
|
||||
|
||||
child_tasks = []
|
||||
for idx, (child_config, child_id) in enumerate(zip(child_configs, child_tenant_ids)):
|
||||
task = self._clone_child_outlet(
|
||||
@@ -617,6 +704,13 @@ class CloneOrchestrator:
|
||||
for r in children_results
|
||||
]
|
||||
|
||||
# Update progress: Children cloning completed
|
||||
await self._update_progress_in_redis(session_id, {
|
||||
"parent": parent_result,
|
||||
"children": results["children"],
|
||||
"distribution": {}
|
||||
})
|
||||
|
||||
# BUG-006 FIX: Track children for potential rollback
|
||||
for child_result in results["children"]:
|
||||
if child_result.get("status") not in ["failed"]:
|
||||
@@ -630,6 +724,13 @@ class CloneOrchestrator:
|
||||
distribution_url = os.getenv("DISTRIBUTION_SERVICE_URL", "http://distribution-service:8000")
|
||||
logger.info("Setting up distribution data", session_id=session_id, distribution_url=distribution_url)
|
||||
|
||||
# Update progress: Distribution starting
|
||||
await self._update_progress_in_redis(session_id, {
|
||||
"parent": parent_result,
|
||||
"children": results["children"],
|
||||
"distribution": {"status": "pending"}
|
||||
})
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120.0) as client: # Increased timeout for distribution setup
|
||||
response = await client.post(
|
||||
@@ -646,6 +747,13 @@ class CloneOrchestrator:
|
||||
if response.status_code == 200:
|
||||
results["distribution"] = response.json()
|
||||
logger.info("Distribution setup completed successfully", session_id=session_id)
|
||||
|
||||
# Update progress: Distribution completed
|
||||
await self._update_progress_in_redis(session_id, {
|
||||
"parent": parent_result,
|
||||
"children": results["children"],
|
||||
"distribution": results["distribution"]
|
||||
})
|
||||
else:
|
||||
error_detail = response.text if response.text else f"HTTP {response.status_code}"
|
||||
results["distribution"] = {
|
||||
|
||||
@@ -27,7 +27,7 @@ class DemoSessionManager:
|
||||
self.db = db
|
||||
self.redis = redis
|
||||
self.repository = DemoSessionRepository(db)
|
||||
self.orchestrator = CloneOrchestrator()
|
||||
self.orchestrator = CloneOrchestrator(redis_manager=redis) # Pass Redis for real-time progress updates
|
||||
|
||||
async def create_session(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user