demo seed change

This commit is contained in:
Urtzi Alfaro
2025-12-13 23:57:54 +01:00
parent f3688dfb04
commit ff830a3415
299 changed files with 20328 additions and 19485 deletions

View File

@@ -1,3 +1,4 @@
from .orchestration import router as orchestration_router
from .internal_demo import router as internal_demo_router
__all__ = ["orchestration_router"]
__all__ = ["orchestration_router", "internal_demo_router"]

View File

@@ -8,6 +8,7 @@ from typing import Dict, Any
from uuid import UUID
import structlog
import os
import json
from app.core.database import get_db
from sqlalchemy.ext.asyncio import AsyncSession

View File

@@ -133,9 +133,9 @@ from app.api.internal import router as internal_router
service.add_router(orchestration_router)
service.add_router(internal_router)
# INTERNAL: Service-to-service endpoints
from app.api import internal_demo
service.add_router(internal_demo.router)
# INTERNAL: Service-to-service endpoints for demo data cloning
from app.api.internal_demo import router as internal_demo_router
service.add_router(internal_demo_router, tags=["internal-demo"])
@app.middleware("http")

View File

@@ -1,733 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Demo Orchestration Runs Seeding Script for Orchestrator Service
Creates realistic orchestration scenarios in various states for demo purposes
This script runs as a Kubernetes init job inside the orchestrator-service container.
It populates the template tenants with comprehensive orchestration run histories.
Usage:
python /app/scripts/demo/seed_demo_orchestration_runs.py
Environment Variables Required:
ORCHESTRATOR_DATABASE_URL - PostgreSQL connection string for orchestrator database
DEMO_MODE - Set to 'production' for production seeding
LOG_LEVEL - Logging level (default: INFO)
Note: No database lookups needed - all IDs are pre-defined in the JSON file
"""
import asyncio
import uuid
import sys
import os
import json
from datetime import datetime, timezone, timedelta, date
from pathlib import Path
from decimal import Decimal
import random
# Add app to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select, text
import structlog
from app.models.orchestration_run import (
OrchestrationRun, OrchestrationStatus
)
# Add shared utilities to path for demo dates
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.utils.demo_dates import BASE_REFERENCE_DATE
# Configure logging
structlog.configure(
processors=[
structlog.stdlib.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer()
]
)
logger = structlog.get_logger()
# Fixed Demo Tenant IDs (must match tenant service)
DEMO_TENANT_PROFESSIONAL = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") # Individual bakery
DEMO_TENANT_ENTERPRISE_CHAIN = uuid.UUID("c3d4e5f6-a7b8-49c0-d1e2-f3a4b5c6d7e8") # Enterprise parent (Obrador)
# BASE_REFERENCE_DATE now imported from shared utilities to ensure consistency
# between seeding and cloning operations
# Hardcoded orchestration run configurations
ORCHESTRATION_CONFIG = {
"runs_per_tenant": 12,
"temporal_distribution": {
"completed": {
"percentage": 0.4,
"offset_days_min": -30,
"offset_days_max": -1,
"statuses": ["completed"]
},
"in_execution": {
"percentage": 0.25,
"offset_days_min": -5,
"offset_days_max": 2,
"statuses": ["running", "partial_success"]
},
"failed": {
"percentage": 0.1,
"offset_days_min": -10,
"offset_days_max": -1,
"statuses": ["failed"]
},
"cancelled": {
"percentage": 0.05,
"offset_days_min": -7,
"offset_days_max": -1,
"statuses": ["cancelled"]
},
"pending": {
"percentage": 0.2,
"offset_days_min": 0,
"offset_days_max": 3,
"statuses": ["pending"]
}
},
"run_types": [
{"type": "scheduled", "weight": 0.7},
{"type": "manual", "weight": 0.25},
{"type": "test", "weight": 0.05}
],
"priorities": {
"normal": 0.7,
"high": 0.25,
"critical": 0.05
},
"performance_metrics": {
"fulfillment_rate": {"min": 85.0, "max": 98.0},
"on_time_delivery": {"min": 80.0, "max": 95.0},
"cost_accuracy": {"min": 90.0, "max": 99.0},
"quality_score": {"min": 7.0, "max": 9.5}
},
"step_durations": {
"forecasting": {"min": 30, "max": 120}, # seconds
"production": {"min": 60, "max": 300},
"procurement": {"min": 45, "max": 180},
"notification": {"min": 15, "max": 60}
},
"error_scenarios": [
{"type": "forecasting_timeout", "message": "Forecasting service timeout - retrying"},
{"type": "production_unavailable", "message": "Production service temporarily unavailable"},
{"type": "procurement_failure", "message": "Procurement service connection failed"},
{"type": "notification_error", "message": "Notification service rate limit exceeded"}
]
}
def calculate_date_from_offset(offset_days: int) -> date:
"""Calculate a date based on offset from BASE_REFERENCE_DATE"""
return (BASE_REFERENCE_DATE + timedelta(days=offset_days)).date()
def calculate_datetime_from_offset(offset_days: int) -> datetime:
"""Calculate a datetime based on offset from BASE_REFERENCE_DATE"""
return BASE_REFERENCE_DATE + timedelta(days=offset_days)
def weighted_choice(choices: list) -> dict:
"""Make a weighted random choice from list of dicts with 'weight' key"""
total_weight = sum(c.get("weight", 1.0) for c in choices)
r = random.uniform(0, total_weight)
cumulative = 0
for choice in choices:
cumulative += choice.get("weight", 1.0)
if r <= cumulative:
return choice
return choices[-1]
def generate_run_number(tenant_id: uuid.UUID, index: int, run_type: str) -> str:
"""Generate a unique run number"""
tenant_prefix = "SP" if tenant_id == DEMO_TENANT_PROFESSIONAL else "LE"
type_code = run_type[0:3].upper()
current_year = datetime.now(timezone.utc).year
return f"ORCH-{tenant_prefix}-{type_code}-{current_year}-{index:03d}"
def generate_reasoning_metadata(
forecasts_generated: int,
production_batches_created: int,
procurement_plans_created: int,
purchase_orders_created: int
) -> dict:
"""
Generate reasoning metadata for orchestration run that will be used by alert processor.
This creates structured reasoning data that the alert processor can use to provide
context when showing AI reasoning to users.
"""
# Calculate aggregate metrics for dashboard display
# Dashboard expects these fields at the top level of the 'reasoning' object
critical_items_count = random.randint(1, 3) if purchase_orders_created > 0 else 0
financial_impact_eur = random.randint(200, 1500) if critical_items_count > 0 else 0
min_depletion_hours = random.uniform(6.0, 48.0) if critical_items_count > 0 else 0
reasoning_metadata = {
'reasoning': {
'type': 'daily_orchestration_summary',
'timestamp': datetime.now(timezone.utc).isoformat(),
# TOP-LEVEL FIELDS - Dashboard reads these directly (dashboard_service.py:411-413)
'critical_items_count': critical_items_count,
'financial_impact_eur': round(financial_impact_eur, 2),
'min_depletion_hours': round(min_depletion_hours, 1),
'time_until_consequence_hours': round(min_depletion_hours, 1),
'affected_orders': random.randint(0, 5) if critical_items_count > 0 else 0,
'summary': 'Daily orchestration run completed successfully',
# Keep existing details structure for backward compatibility
'details': {
'forecasting': {
'forecasts_created': forecasts_generated,
'method': 'automated_daily_forecast',
'reasoning': 'Generated forecasts based on historical patterns and seasonal trends'
},
'production': {
'batches_created': production_batches_created,
'method': 'demand_based_scheduling',
'reasoning': 'Scheduled production batches based on forecasted demand and inventory levels'
},
'procurement': {
'requirements_created': procurement_plans_created,
'pos_created': purchase_orders_created,
'method': 'automated_procurement',
'reasoning': 'Generated procurement plan based on production needs and inventory optimization'
}
}
},
'purchase_orders': [],
'production_batches': [],
'ai_insights': {
'generated': 0,
'posted': 0
}
}
# Add sample purchase order reasoning if any POs were created
if purchase_orders_created > 0:
for i in range(min(purchase_orders_created, 3)): # Limit to 3 sample POs
po_reasoning = {
'id': f'po-sample-{i+1}',
'status': 'created',
'reasoning': {
'type': 'inventory_optimization',
'parameters': {
'trigger': 'low_stock_prediction',
'min_depletion_days': random.randint(2, 5),
'quantity': random.randint(100, 500),
'unit': 'kg',
'supplier': 'Demo Supplier',
'financial_impact_eur': random.randint(100, 1000)
}
}
}
reasoning_metadata['purchase_orders'].append(po_reasoning)
# Add sample production batch reasoning if any batches were created
if production_batches_created > 0:
for i in range(min(production_batches_created, 3)): # Limit to 3 sample batches
batch_reasoning = {
'id': f'batch-sample-{i+1}',
'status': 'scheduled',
'reasoning': {
'type': 'demand_forecasting',
'parameters': {
'trigger': 'forecasted_demand',
'forecasted_quantity': random.randint(50, 200),
'product_name': 'Demo Product',
'financial_impact_eur': random.randint(50, 500)
}
}
}
reasoning_metadata['production_batches'].append(batch_reasoning)
return reasoning_metadata
async def generate_orchestration_for_tenant(
db: AsyncSession,
tenant_id: uuid.UUID,
tenant_name: str,
business_model: str,
config: dict
) -> dict:
"""Generate orchestration runs for a specific tenant"""
logger.info("" * 80)
logger.info(f"Generating orchestration runs for: {tenant_name}")
logger.info(f"Tenant ID: {tenant_id}")
logger.info("" * 80)
# Check if orchestration runs already exist
result = await db.execute(
select(OrchestrationRun).where(OrchestrationRun.tenant_id == tenant_id).limit(1)
)
existing = result.scalar_one_or_none()
if existing:
logger.info(f" ⏭️ Orchestration runs already exist for {tenant_name}, skipping seed")
return {
"tenant_id": str(tenant_id),
"runs_created": 0,
"steps_created": 0,
"skipped": True
}
orch_config = config["orchestration_config"]
total_runs = orch_config["runs_per_tenant"]
runs_created = 0
steps_created = 0
# Special case: Create at least 1 recent completed run for "today" (for dashboard visibility)
# This ensures the dashboard "Listo Para Planificar Tu Día" shows data
today_run_created = False
for i in range(total_runs):
# For the first run, create it for today with completed status
if i == 0 and not today_run_created:
temporal_category = orch_config["temporal_distribution"]["completed"]
# Use current time instead of BASE_REFERENCE_DATE
now = datetime.now(timezone.utc)
# Set offset to create run that started yesterday and completed today
offset_days = 0 # Today
run_date = now.date()
today_run_created = True
# Force status to completed for dashboard visibility
status = "completed"
else:
# Determine temporal distribution
rand_temporal = random.random()
cumulative = 0
temporal_category = None
for category, details in orch_config["temporal_distribution"].items():
cumulative += details["percentage"]
if rand_temporal <= cumulative:
temporal_category = details
break
if not temporal_category:
temporal_category = orch_config["temporal_distribution"]["completed"]
# Calculate run date
offset_days = random.randint(
temporal_category["offset_days_min"],
temporal_category["offset_days_max"]
)
run_date = calculate_date_from_offset(offset_days)
# Select status
status = random.choice(temporal_category["statuses"])
# Select run type
run_type_choice = weighted_choice(orch_config["run_types"])
run_type = run_type_choice["type"]
# Select priority
priority_rand = random.random()
cumulative_priority = 0
priority = "normal"
for p, weight in orch_config["priorities"].items():
cumulative_priority += weight
if priority_rand <= cumulative_priority:
priority = p
break
# Generate run number
run_number = generate_run_number(tenant_id, i + 1, run_type)
# Calculate timing based on status
# For today's run (i==0), use current datetime instead of BASE_REFERENCE_DATE
if i == 0 and today_run_created:
now = datetime.now(timezone.utc)
started_at = now - timedelta(minutes=90) # Started 90 minutes ago (1.5 hours)
completed_at = now - timedelta(minutes=30) # Completed 30 minutes ago, so 60-minute duration
duration_seconds = int((completed_at - started_at).total_seconds())
else:
started_at = calculate_datetime_from_offset(offset_days - 1)
completed_at = None
duration_seconds = None
if status in ["completed", "partial_success"]:
completed_at = calculate_datetime_from_offset(offset_days)
# Calculate duration based on realistic processing times
duration_seconds = int((completed_at - started_at).total_seconds())
# Cap duration to reasonable values
if duration_seconds > 5400: # More than 1.5 hours
duration_seconds = random.randint(1800, 3600) # 30-60 minutes
elif status == "failed":
completed_at = calculate_datetime_from_offset(offset_days - 0.5)
duration_seconds = int((completed_at - started_at).total_seconds())
if duration_seconds > 3600: # More than 1 hour
duration_seconds = random.randint(300, 1800) # 5-30 minutes
elif status == "cancelled":
completed_at = calculate_datetime_from_offset(offset_days - 0.2)
duration_seconds = int((completed_at - started_at).total_seconds())
if duration_seconds > 1800: # More than 30 minutes
duration_seconds = random.randint(60, 600) # 1-10 minutes
# Generate step timing
forecasting_started_at = started_at
forecasting_completed_at = forecasting_started_at + timedelta(seconds=random.randint(
orch_config["step_durations"]["forecasting"]["min"],
orch_config["step_durations"]["forecasting"]["max"]
))
forecasting_status = "success"
forecasting_error = None
production_started_at = forecasting_completed_at
production_completed_at = production_started_at + timedelta(seconds=random.randint(
orch_config["step_durations"]["production"]["min"],
orch_config["step_durations"]["production"]["max"]
))
production_status = "success"
production_error = None
procurement_started_at = production_completed_at
procurement_completed_at = procurement_started_at + timedelta(seconds=random.randint(
orch_config["step_durations"]["procurement"]["min"],
orch_config["step_durations"]["procurement"]["max"]
))
procurement_status = "success"
procurement_error = None
notification_started_at = procurement_completed_at
notification_completed_at = notification_started_at + timedelta(seconds=random.randint(
orch_config["step_durations"]["notification"]["min"],
orch_config["step_durations"]["notification"]["max"]
))
notification_status = "success"
notification_error = None
# Simulate errors for failed runs
if status == "failed":
error_scenario = random.choice(orch_config["error_scenarios"])
error_step = random.choice(["forecasting", "production", "procurement", "notification"])
if error_step == "forecasting":
forecasting_status = "failed"
forecasting_error = error_scenario["message"]
elif error_step == "production":
production_status = "failed"
production_error = error_scenario["message"]
elif error_step == "procurement":
procurement_status = "failed"
procurement_error = error_scenario["message"]
elif error_step == "notification":
notification_status = "failed"
notification_error = error_scenario["message"]
# Generate results summary
# For professional tenant, use realistic fixed counts that match PO seed data
if tenant_id == DEMO_TENANT_PROFESSIONAL:
forecasts_generated = 12 # Realistic daily forecast count
production_batches_created = 6 # Realistic batch count
procurement_plans_created = 3 # 3 procurement plans
purchase_orders_created = 18 # Total POs including 9 delivery POs (PO #11-18)
notifications_sent = 24 # Realistic notification count
else:
# Enterprise tenant can keep random values
forecasts_generated = random.randint(5, 15)
production_batches_created = random.randint(3, 8)
procurement_plans_created = random.randint(2, 6)
purchase_orders_created = random.randint(1, 4)
notifications_sent = random.randint(10, 25)
# Generate performance metrics for completed runs
fulfillment_rate = None
on_time_delivery_rate = None
cost_accuracy = None
quality_score = None
if status in ["completed", "partial_success"]:
metrics = orch_config["performance_metrics"]
fulfillment_rate = Decimal(str(random.uniform(
metrics["fulfillment_rate"]["min"],
metrics["fulfillment_rate"]["max"]
)))
on_time_delivery_rate = Decimal(str(random.uniform(
metrics["on_time_delivery"]["min"],
metrics["on_time_delivery"]["max"]
)))
cost_accuracy = Decimal(str(random.uniform(
metrics["cost_accuracy"]["min"],
metrics["cost_accuracy"]["max"]
)))
quality_score = Decimal(str(random.uniform(
metrics["quality_score"]["min"],
metrics["quality_score"]["max"]
)))
# Generate reasoning metadata for the orchestrator context
reasoning_metadata = generate_reasoning_metadata(
forecasts_generated,
production_batches_created,
procurement_plans_created,
purchase_orders_created
)
# Create orchestration run
run = OrchestrationRun(
id=uuid.uuid4(),
tenant_id=tenant_id,
run_number=run_number,
status=OrchestrationStatus(status),
run_type=run_type,
priority=priority,
started_at=started_at,
completed_at=completed_at,
duration_seconds=duration_seconds,
forecasting_started_at=forecasting_started_at,
forecasting_completed_at=forecasting_completed_at,
forecasting_status=forecasting_status,
forecasting_error=forecasting_error,
production_started_at=production_started_at,
production_completed_at=production_completed_at,
production_status=production_status,
production_error=production_error,
procurement_started_at=procurement_started_at,
procurement_completed_at=procurement_completed_at,
procurement_status=procurement_status,
procurement_error=procurement_error,
notification_started_at=notification_started_at,
notification_completed_at=notification_completed_at,
notification_status=notification_status,
notification_error=notification_error,
forecasts_generated=forecasts_generated,
production_batches_created=production_batches_created,
procurement_plans_created=procurement_plans_created,
purchase_orders_created=purchase_orders_created,
notifications_sent=notifications_sent,
fulfillment_rate=fulfillment_rate,
on_time_delivery_rate=on_time_delivery_rate,
cost_accuracy=cost_accuracy,
quality_score=quality_score,
run_metadata=reasoning_metadata,
created_at=calculate_datetime_from_offset(offset_days - 2),
updated_at=calculate_datetime_from_offset(offset_days),
triggered_by="scheduler" if run_type == "scheduled" else "user" if run_type == "manual" else "test-runner"
)
db.add(run)
await db.flush() # Get run ID
runs_created += 1
steps_created += 4 # forecasting, production, procurement, notification
await db.commit()
logger.info(f" 📊 Successfully created {runs_created} orchestration runs with {steps_created} steps for {tenant_name}")
logger.info("")
return {
"tenant_id": str(tenant_id),
"runs_created": runs_created,
"steps_created": steps_created,
"skipped": False
}
async def seed_all(db: AsyncSession):
"""Seed all demo tenants with orchestration runs"""
logger.info("=" * 80)
logger.info("🚀 Starting Demo Orchestration Runs Seeding")
logger.info("=" * 80)
# Load configuration
config = {
"orchestration_config": {
"runs_per_tenant": 12,
"temporal_distribution": {
"completed": {
"percentage": 0.4,
"offset_days_min": -30,
"offset_days_max": -1,
"statuses": ["completed"]
},
"in_execution": {
"percentage": 0.25,
"offset_days_min": -5,
"offset_days_max": 2,
"statuses": ["running", "partial_success"]
},
"failed": {
"percentage": 0.1,
"offset_days_min": -10,
"offset_days_max": -1,
"statuses": ["failed"]
},
"cancelled": {
"percentage": 0.05,
"offset_days_min": -7,
"offset_days_max": -1,
"statuses": ["cancelled"]
},
"pending": {
"percentage": 0.2,
"offset_days_min": 0,
"offset_days_max": 3,
"statuses": ["pending"]
}
},
"run_types": [
{"type": "scheduled", "weight": 0.7},
{"type": "manual", "weight": 0.25},
{"type": "test", "weight": 0.05}
],
"priorities": {
"normal": 0.7,
"high": 0.25,
"critical": 0.05
},
"performance_metrics": {
"fulfillment_rate": {"min": 85.0, "max": 98.0},
"on_time_delivery": {"min": 80.0, "max": 95.0},
"cost_accuracy": {"min": 90.0, "max": 99.0},
"quality_score": {"min": 7.0, "max": 9.5}
},
"step_durations": {
"forecasting": {"min": 30, "max": 120}, # seconds
"production": {"min": 60, "max": 300},
"procurement": {"min": 45, "max": 180},
"notification": {"min": 15, "max": 60}
},
"error_scenarios": [
{"type": "forecasting_timeout", "message": "Forecasting service timeout - retrying"},
{"type": "production_unavailable", "message": "Production service temporarily unavailable"},
{"type": "procurement_failure", "message": "Procurement service connection failed"},
{"type": "notification_error", "message": "Notification service rate limit exceeded"}
]
}
}
results = []
# Seed Professional Bakery (single location)
result_professional = await generate_orchestration_for_tenant(
db,
DEMO_TENANT_PROFESSIONAL,
"Panadería Artesana Madrid (Professional)",
"individual_bakery",
config
)
results.append(result_professional)
# Seed Enterprise Parent (central production - Obrador)
result_enterprise_parent = await generate_orchestration_for_tenant(
db,
DEMO_TENANT_ENTERPRISE_CHAIN,
"Panadería Central - Obrador Madrid (Enterprise Parent)",
"enterprise_chain",
config
)
results.append(result_enterprise_parent)
total_runs = sum(r["runs_created"] for r in results)
total_steps = sum(r["steps_created"] for r in results)
logger.info("=" * 80)
logger.info("✅ Demo Orchestration Runs Seeding Completed")
logger.info("=" * 80)
return {
"results": results,
"total_runs_created": total_runs,
"total_steps_created": total_steps,
"status": "completed"
}
async def main():
"""Main execution function"""
logger.info("Demo Orchestration Runs Seeding Script Starting")
logger.info("Mode: %s", os.getenv("DEMO_MODE", "development"))
logger.info("Log Level: %s", os.getenv("LOG_LEVEL", "INFO"))
# Get database URL from environment
database_url = os.getenv("ORCHESTRATOR_DATABASE_URL") or os.getenv("DATABASE_URL")
if not database_url:
logger.error("❌ ORCHESTRATOR_DATABASE_URL or DATABASE_URL environment variable must be set")
return 1
# Ensure asyncpg driver
if database_url.startswith("postgresql://"):
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
logger.info("Connecting to orchestrator database")
# Create async engine
engine = create_async_engine(
database_url,
echo=False,
pool_pre_ping=True,
pool_size=5,
max_overflow=10
)
async_session = sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False
)
try:
async with async_session() as session:
result = await seed_all(session)
logger.info("")
logger.info("📊 Seeding Summary:")
logger.info(f" ✅ Total Runs: {result['total_runs_created']}")
logger.info(f" ✅ Total Steps: {result['total_steps_created']}")
logger.info(f" ✅ Status: {result['status']}")
logger.info("")
# Print per-tenant details
for tenant_result in result["results"]:
tenant_id = tenant_result["tenant_id"]
runs = tenant_result["runs_created"]
steps = tenant_result["steps_created"]
skipped = tenant_result.get("skipped", False)
status = "SKIPPED (already exists)" if skipped else f"CREATED {runs} runs, {steps} steps"
logger.info(f" Tenant {tenant_id}: {status}")
logger.info("")
logger.info("🎉 Success! Orchestration runs are ready for demo sessions.")
logger.info("")
logger.info("Runs created:")
logger.info(" • 12 Orchestration runs per tenant")
logger.info(" • Various statuses: completed, running, failed, cancelled, pending")
logger.info(" • Different types: scheduled, manual, test")
logger.info(" • Performance metrics tracking")
logger.info("")
logger.info("Note: All IDs are pre-defined and hardcoded for cross-service consistency")
logger.info("")
return 0
except Exception as e:
logger.error("=" * 80)
logger.error("❌ Demo Orchestration Runs Seeding Failed")
logger.error("=" * 80)
logger.error("Error: %s", str(e))
logger.error("", exc_info=True)
return 1
finally:
await engine.dispose()
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)