New enterprise feature

This commit is contained in:
Urtzi Alfaro
2025-11-30 09:12:40 +01:00
parent f9d0eec6ec
commit 972db02f6d
176 changed files with 19741 additions and 1361 deletions

View File

@@ -3,5 +3,6 @@
from .demo_sessions import router as demo_sessions_router
from .demo_accounts import router as demo_accounts_router
from .demo_operations import router as demo_operations_router
from .internal import router as internal_router
__all__ = ["demo_sessions_router", "demo_accounts_router", "demo_operations_router"]
__all__ = ["demo_sessions_router", "demo_accounts_router", "demo_operations_router", "internal_router"]

View File

@@ -5,6 +5,7 @@ Demo Sessions API - Atomic CRUD operations on DemoSession model
from fastapi import APIRouter, Depends, HTTPException, Path, Query, Request
from typing import Optional
from uuid import UUID
from datetime import datetime, timezone
import structlog
import jwt
@@ -54,6 +55,41 @@ async def _background_cloning_task(session_id: str, session_obj_id: UUID, base_t
error=str(e),
exc_info=True
)
# Attempt to update session status to failed if possible
try:
from app.core.database import db_manager
from app.models import DemoSession
from sqlalchemy import select, update
# Try to update the session directly in DB to mark it as failed
async with db_manager.session_factory() as update_db:
from app.models import DemoSessionStatus
update_result = await update_db.execute(
update(DemoSession)
.where(DemoSession.id == session_obj_id)
.values(status=DemoSessionStatus.FAILED, cloning_completed_at=datetime.now(timezone.utc))
)
await update_db.commit()
except Exception as update_error:
logger.error(
"Failed to update session status to FAILED after background task error",
session_id=session_id,
error=str(update_error)
)
def _handle_task_result(task, session_id: str):
"""Handle the result of the background cloning task"""
try:
# This will raise the exception if the task failed
task.result()
except Exception as e:
logger.error(
"Background cloning task failed with exception",
session_id=session_id,
error=str(e),
exc_info=True
)
@router.post(
@@ -77,6 +113,7 @@ async def create_demo_session(
session_manager = DemoSessionManager(db, redis)
session = await session_manager.create_session(
demo_account_type=request.demo_account_type,
subscription_tier=request.subscription_tier,
user_id=request.user_id,
ip_address=ip_address,
user_agent=user_agent
@@ -92,10 +129,14 @@ async def create_demo_session(
base_tenant_id = demo_config.get("base_tenant_id", str(session.base_demo_tenant_id))
# Start cloning in background task with session ID (not session object)
asyncio.create_task(
# Store task reference in case we need to track it
task = asyncio.create_task(
_background_cloning_task(session.session_id, session.id, base_tenant_id)
)
# Add error handling for the task to prevent silent failures
task.add_done_callback(lambda t: _handle_task_result(t, session.session_id))
# Generate session token
session_token = jwt.encode(
{
@@ -104,8 +145,8 @@ async def create_demo_session(
"demo_account_type": request.demo_account_type,
"exp": session.expires_at.timestamp()
},
"demo-secret-key",
algorithm="HS256"
settings.JWT_SECRET_KEY,
algorithm=settings.JWT_ALGORITHM
)
return {

View File

@@ -0,0 +1,82 @@
"""
Internal API for Demo Session Service
Handles internal service-to-service operations
"""
from fastapi import APIRouter, Depends, HTTPException, Header
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
from app.core import get_db, settings
from app.core.redis_wrapper import get_redis, DemoRedisWrapper
from app.services.data_cloner import DemoDataCloner
logger = structlog.get_logger()
router = APIRouter()
async def verify_internal_api_key(x_internal_api_key: str = Header(None)):
"""Verify internal API key for service-to-service communication"""
required_key = settings.INTERNAL_API_KEY
if x_internal_api_key != required_key:
logger.warning("Unauthorized internal API access attempted")
raise HTTPException(status_code=403, detail="Invalid internal API key")
return True
@router.post("/internal/demo/cleanup")
async def cleanup_demo_session_internal(
cleanup_request: dict,
db: AsyncSession = Depends(get_db),
redis: DemoRedisWrapper = Depends(get_redis),
_: bool = Depends(verify_internal_api_key)
):
"""
Internal endpoint to cleanup demo session data for a specific tenant
Used by rollback mechanisms
"""
try:
tenant_id = cleanup_request.get('tenant_id')
session_id = cleanup_request.get('session_id')
if not all([tenant_id, session_id]):
raise HTTPException(
status_code=400,
detail="Missing required parameters: tenant_id, session_id"
)
logger.info(
"Internal cleanup requested",
tenant_id=tenant_id,
session_id=session_id
)
data_cloner = DemoDataCloner(db, redis)
# Delete session data for this tenant
await data_cloner.delete_session_data(
str(tenant_id),
session_id
)
logger.info(
"Internal cleanup completed",
tenant_id=tenant_id,
session_id=session_id
)
return {
"status": "completed",
"tenant_id": tenant_id,
"session_id": session_id
}
except Exception as e:
logger.error(
"Internal cleanup failed",
error=str(e),
tenant_id=cleanup_request.get('tenant_id'),
session_id=cleanup_request.get('session_id'),
exc_info=True
)
raise HTTPException(status_code=500, detail=f"Failed to cleanup demo session: {str(e)}")

View File

@@ -10,7 +10,7 @@ import structlog
from contextlib import asynccontextmanager
from app.core import settings, DatabaseManager
from app.api import demo_sessions, demo_accounts, demo_operations
from app.api import demo_sessions, demo_accounts, demo_operations, internal
from shared.redis_utils import initialize_redis, close_redis
logger = structlog.get_logger()
@@ -81,6 +81,7 @@ async def global_exception_handler(request: Request, exc: Exception):
app.include_router(demo_sessions.router)
app.include_router(demo_accounts.router)
app.include_router(demo_operations.router)
app.include_router(internal.router)
@app.get("/")

View File

@@ -16,6 +16,10 @@ from app.models.demo_session import CloningStatus
logger = structlog.get_logger()
# Import json for Redis serialization
import json
class ServiceDefinition:
"""Definition of a service that can clone demo data"""
@@ -29,9 +33,10 @@ class ServiceDefinition:
class CloneOrchestrator:
"""Orchestrates parallel demo data cloning across services"""
def __init__(self):
def __init__(self, redis_manager=None):
from app.core.config import settings
self.internal_api_key = settings.INTERNAL_API_KEY
self.redis_manager = redis_manager # For real-time progress updates
# Define services that participate in cloning
# URLs should be internal Kubernetes service names
@@ -110,6 +115,66 @@ class CloneOrchestrator:
),
]
async def _update_progress_in_redis(
self,
session_id: str,
progress_data: Dict[str, Any]
):
"""Update cloning progress in Redis for real-time frontend polling"""
if not self.redis_manager:
return # Skip if no Redis manager provided
try:
status_key = f"session:{session_id}:status"
client = await self.redis_manager.get_client()
# Get existing status data or create new
existing_data_str = await client.get(status_key)
if existing_data_str:
status_data = json.loads(existing_data_str)
else:
# Initialize basic status structure
status_data = {
"session_id": session_id,
"status": "pending",
"progress": {},
"total_records_cloned": 0
}
# Update progress field with new data
status_data["progress"] = progress_data
# Calculate total records cloned from progress
total_records = 0
if "parent" in progress_data and "total_records_cloned" in progress_data["parent"]:
total_records += progress_data["parent"]["total_records_cloned"]
if "children" in progress_data:
for child in progress_data["children"]:
if isinstance(child, dict) and "records_cloned" in child:
total_records += child["records_cloned"]
status_data["total_records_cloned"] = total_records
# Update Redis with 2-hour TTL
await client.setex(
status_key,
7200, # 2 hours
json.dumps(status_data)
)
logger.debug(
"Updated progress in Redis",
session_id=session_id,
progress_keys=list(progress_data.keys())
)
except Exception as e:
# Don't fail cloning if progress update fails
logger.warning(
"Failed to update progress in Redis",
session_id=session_id,
error=str(e)
)
async def clone_all_services(
self,
base_tenant_id: str,
@@ -535,6 +600,14 @@ class CloneOrchestrator:
try:
# Step 1: Clone parent tenant
logger.info("Cloning parent tenant", session_id=session_id)
# Update progress: Parent cloning started
await self._update_progress_in_redis(session_id, {
"parent": {"overall_status": "pending"},
"children": [],
"distribution": {}
})
parent_result = await self.clone_all_services(
base_tenant_id=base_tenant_id,
virtual_tenant_id=parent_tenant_id,
@@ -543,6 +616,13 @@ class CloneOrchestrator:
)
results["parent"] = parent_result
# Update progress: Parent cloning completed
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": [],
"distribution": {}
})
# BUG-006 FIX: Track parent for potential rollback
if parent_result.get("overall_status") not in ["failed"]:
rollback_stack.append({
@@ -599,6 +679,13 @@ class CloneOrchestrator:
child_count=len(child_configs)
)
# Update progress: Children cloning started
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": [{"status": "pending"} for _ in child_configs],
"distribution": {}
})
child_tasks = []
for idx, (child_config, child_id) in enumerate(zip(child_configs, child_tenant_ids)):
task = self._clone_child_outlet(
@@ -617,6 +704,13 @@ class CloneOrchestrator:
for r in children_results
]
# Update progress: Children cloning completed
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": results["children"],
"distribution": {}
})
# BUG-006 FIX: Track children for potential rollback
for child_result in results["children"]:
if child_result.get("status") not in ["failed"]:
@@ -630,6 +724,13 @@ class CloneOrchestrator:
distribution_url = os.getenv("DISTRIBUTION_SERVICE_URL", "http://distribution-service:8000")
logger.info("Setting up distribution data", session_id=session_id, distribution_url=distribution_url)
# Update progress: Distribution starting
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": results["children"],
"distribution": {"status": "pending"}
})
try:
async with httpx.AsyncClient(timeout=120.0) as client: # Increased timeout for distribution setup
response = await client.post(
@@ -646,6 +747,13 @@ class CloneOrchestrator:
if response.status_code == 200:
results["distribution"] = response.json()
logger.info("Distribution setup completed successfully", session_id=session_id)
# Update progress: Distribution completed
await self._update_progress_in_redis(session_id, {
"parent": parent_result,
"children": results["children"],
"distribution": results["distribution"]
})
else:
error_detail = response.text if response.text else f"HTTP {response.status_code}"
results["distribution"] = {

View File

@@ -27,7 +27,7 @@ class DemoSessionManager:
self.db = db
self.redis = redis
self.repository = DemoSessionRepository(db)
self.orchestrator = CloneOrchestrator()
self.orchestrator = CloneOrchestrator(redis_manager=redis) # Pass Redis for real-time progress updates
async def create_session(
self,