demo seed change

This commit is contained in:
Urtzi Alfaro
2025-12-13 23:57:54 +01:00
parent f3688dfb04
commit ff830a3415
299 changed files with 20328 additions and 19485 deletions

View File

@@ -13,6 +13,7 @@ from typing import Optional
import os
import sys
from pathlib import Path
import json
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.utils.demo_dates import adjust_date_for_demo, BASE_REFERENCE_DATE
@@ -21,7 +22,7 @@ from app.core.database import get_db
from app.models.forecasts import Forecast, PredictionBatch
logger = structlog.get_logger()
router = APIRouter(prefix="/internal/demo", tags=["internal"])
router = APIRouter()
# Base demo tenant IDs
DEMO_TENANT_PROFESSIONAL = "a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6"
@@ -36,7 +37,7 @@ def verify_internal_api_key(x_internal_api_key: Optional[str] = Header(None)):
return True
@router.post("/clone")
@router.post("/internal/demo/clone")
async def clone_demo_data(
base_tenant_id: str,
virtual_tenant_id: str,
@@ -49,50 +50,95 @@ async def clone_demo_data(
"""
Clone forecasting service data for a virtual demo tenant
Clones:
- Forecasts (historical predictions)
- Prediction batches (batch prediction records)
This endpoint creates fresh demo data by:
1. Loading seed data from JSON files
2. Applying XOR-based ID transformation
3. Adjusting dates relative to session creation time
4. Creating records in the virtual tenant
Args:
base_tenant_id: Template tenant UUID to clone from
base_tenant_id: Template tenant UUID (for reference)
virtual_tenant_id: Target virtual tenant UUID
demo_account_type: Type of demo account
session_id: Originating session ID for tracing
session_created_at: ISO timestamp when demo session was created (for date adjustment)
session_created_at: Session creation timestamp for date adjustment
db: Database session
Returns:
Cloning status and record counts
Dictionary with cloning results
Raises:
HTTPException: On validation or cloning errors
"""
start_time = datetime.now(timezone.utc)
# Parse session_created_at or fallback to now
if session_created_at:
try:
session_time = datetime.fromisoformat(session_created_at.replace('Z', '+00:00'))
except (ValueError, AttributeError) as e:
logger.warning(
"Invalid session_created_at format, using current time",
session_created_at=session_created_at,
error=str(e)
)
session_time = datetime.now(timezone.utc)
else:
logger.warning("session_created_at not provided, using current time")
session_time = datetime.now(timezone.utc)
logger.info(
"Starting forecasting data cloning",
base_tenant_id=base_tenant_id,
virtual_tenant_id=virtual_tenant_id,
demo_account_type=demo_account_type,
session_id=session_id,
session_time=session_time.isoformat()
)
try:
# Validate UUIDs
base_uuid = uuid.UUID(base_tenant_id)
virtual_uuid = uuid.UUID(virtual_tenant_id)
# Parse session creation time for date adjustment
if session_created_at:
try:
session_time = datetime.fromisoformat(session_created_at.replace('Z', '+00:00'))
except (ValueError, AttributeError):
session_time = start_time
else:
session_time = start_time
logger.info(
"Starting forecasting data cloning with date adjustment",
base_tenant_id=base_tenant_id,
virtual_tenant_id=str(virtual_uuid),
demo_account_type=demo_account_type,
session_id=session_id,
session_time=session_time.isoformat()
)
# Load seed data using shared utility
try:
from shared.utils.seed_data_paths import get_seed_data_path
if demo_account_type == "enterprise":
profile = "enterprise"
else:
profile = "professional"
json_file = get_seed_data_path(profile, "10-forecasting.json")
except ImportError:
# Fallback to original path
seed_data_dir = Path(__file__).parent.parent.parent.parent / "shared" / "demo" / "fixtures"
if demo_account_type == "enterprise":
json_file = seed_data_dir / "enterprise" / "parent" / "10-forecasting.json"
else:
json_file = seed_data_dir / "professional" / "10-forecasting.json"
if not json_file.exists():
raise HTTPException(
status_code=404,
detail=f"Seed data file not found: {json_file}"
)
# Load JSON data
with open(json_file, 'r', encoding='utf-8') as f:
seed_data = json.load(f)
# Check if data already exists for this virtual tenant (idempotency)
existing_check = await db.execute(
select(Forecast).where(Forecast.tenant_id == virtual_uuid).limit(1)
)
existing_forecast = existing_check.scalar_one_or_none()
if existing_forecast:
logger.warning(
"Demo data already exists, skipping clone",
virtual_tenant_id=str(virtual_uuid)
)
return {
"status": "skipped",
"reason": "Data already exists",
"records_cloned": 0
}
# Track cloning statistics
stats = {
@@ -100,93 +146,150 @@ async def clone_demo_data(
"prediction_batches": 0
}
# Clone Forecasts
result = await db.execute(
select(Forecast).where(Forecast.tenant_id == base_uuid)
)
base_forecasts = result.scalars().all()
# Transform and insert forecasts
for forecast_data in seed_data.get('forecasts', []):
# Transform ID using XOR
from shared.utils.demo_id_transformer import transform_id
try:
forecast_uuid = uuid.UUID(forecast_data['id'])
tenant_uuid = uuid.UUID(virtual_tenant_id)
transformed_id = transform_id(forecast_data['id'], tenant_uuid)
except ValueError as e:
logger.error("Failed to parse UUIDs for ID transformation",
forecast_id=forecast_data['id'],
virtual_tenant_id=virtual_tenant_id,
error=str(e))
raise HTTPException(
status_code=400,
detail=f"Invalid UUID format in forecast data: {str(e)}"
)
# Transform dates
for date_field in ['forecast_date', 'created_at']:
if date_field in forecast_data:
try:
date_value = forecast_data[date_field]
if isinstance(date_value, str):
original_date = datetime.fromisoformat(date_value)
elif hasattr(date_value, 'isoformat'):
original_date = date_value
else:
logger.warning("Skipping invalid date format",
date_field=date_field,
date_value=date_value)
continue
adjusted_forecast_date = adjust_date_for_demo(
original_date,
session_time,
BASE_REFERENCE_DATE
)
forecast_data[date_field] = adjusted_forecast_date
except (ValueError, AttributeError) as e:
logger.warning("Failed to parse date, skipping",
date_field=date_field,
date_value=forecast_data[date_field],
error=str(e))
forecast_data.pop(date_field, None)
# Create forecast
# Map product_id to inventory_product_id if needed
inventory_product_id = forecast_data.get('inventory_product_id') or forecast_data.get('product_id')
logger.info(
"Found forecasts to clone",
count=len(base_forecasts),
base_tenant=str(base_uuid)
)
for forecast in base_forecasts:
adjusted_forecast_date = adjust_date_for_demo(
forecast.forecast_date,
session_time,
BASE_REFERENCE_DATE
) if forecast.forecast_date else None
# Map predicted_quantity to predicted_demand if needed
predicted_demand = forecast_data.get('predicted_demand') or forecast_data.get('predicted_quantity')
new_forecast = Forecast(
id=uuid.uuid4(),
id=transformed_id,
tenant_id=virtual_uuid,
inventory_product_id=forecast.inventory_product_id, # Keep product reference
product_name=forecast.product_name,
location=forecast.location,
forecast_date=adjusted_forecast_date,
created_at=session_time,
predicted_demand=forecast.predicted_demand,
confidence_lower=forecast.confidence_lower,
confidence_upper=forecast.confidence_upper,
confidence_level=forecast.confidence_level,
model_id=forecast.model_id,
model_version=forecast.model_version,
algorithm=forecast.algorithm,
business_type=forecast.business_type,
day_of_week=forecast.day_of_week,
is_holiday=forecast.is_holiday,
is_weekend=forecast.is_weekend,
weather_temperature=forecast.weather_temperature,
weather_precipitation=forecast.weather_precipitation,
weather_description=forecast.weather_description,
traffic_volume=forecast.traffic_volume,
processing_time_ms=forecast.processing_time_ms,
features_used=forecast.features_used
inventory_product_id=inventory_product_id,
product_name=forecast_data.get('product_name'),
location=forecast_data.get('location'),
forecast_date=forecast_data.get('forecast_date'),
created_at=forecast_data.get('created_at', session_time),
predicted_demand=predicted_demand,
confidence_lower=forecast_data.get('confidence_lower'),
confidence_upper=forecast_data.get('confidence_upper'),
confidence_level=forecast_data.get('confidence_level', 0.8),
model_id=forecast_data.get('model_id'),
model_version=forecast_data.get('model_version'),
algorithm=forecast_data.get('algorithm', 'prophet'),
business_type=forecast_data.get('business_type', 'individual'),
day_of_week=forecast_data.get('day_of_week'),
is_holiday=forecast_data.get('is_holiday', False),
is_weekend=forecast_data.get('is_weekend', False),
weather_temperature=forecast_data.get('weather_temperature'),
weather_precipitation=forecast_data.get('weather_precipitation'),
weather_description=forecast_data.get('weather_description'),
traffic_volume=forecast_data.get('traffic_volume'),
processing_time_ms=forecast_data.get('processing_time_ms'),
features_used=forecast_data.get('features_used')
)
db.add(new_forecast)
stats["forecasts"] += 1
# Clone Prediction Batches
result = await db.execute(
select(PredictionBatch).where(PredictionBatch.tenant_id == base_uuid)
)
base_batches = result.scalars().all()
logger.info(
"Found prediction batches to clone",
count=len(base_batches),
base_tenant=str(base_uuid)
)
for batch in base_batches:
adjusted_requested_at = adjust_date_for_demo(
batch.requested_at,
session_time,
BASE_REFERENCE_DATE
) if batch.requested_at else None
adjusted_completed_at = adjust_date_for_demo(
batch.completed_at,
session_time,
BASE_REFERENCE_DATE
) if batch.completed_at else None
# Transform and insert prediction batches
for batch_data in seed_data.get('prediction_batches', []):
# Transform ID using XOR
from shared.utils.demo_id_transformer import transform_id
try:
batch_uuid = uuid.UUID(batch_data['id'])
tenant_uuid = uuid.UUID(virtual_tenant_id)
transformed_id = transform_id(batch_data['id'], tenant_uuid)
except ValueError as e:
logger.error("Failed to parse UUIDs for ID transformation",
batch_id=batch_data['id'],
virtual_tenant_id=virtual_tenant_id,
error=str(e))
raise HTTPException(
status_code=400,
detail=f"Invalid UUID format in batch data: {str(e)}"
)
# Transform dates
for date_field in ['requested_at', 'completed_at']:
if date_field in batch_data:
try:
date_value = batch_data[date_field]
if isinstance(date_value, str):
original_date = datetime.fromisoformat(date_value)
elif hasattr(date_value, 'isoformat'):
original_date = date_value
else:
logger.warning("Skipping invalid date format",
date_field=date_field,
date_value=date_value)
continue
adjusted_batch_date = adjust_date_for_demo(
original_date,
session_time,
BASE_REFERENCE_DATE
)
batch_data[date_field] = adjusted_batch_date
except (ValueError, AttributeError) as e:
logger.warning("Failed to parse date, skipping",
date_field=date_field,
date_value=batch_data[date_field],
error=str(e))
batch_data.pop(date_field, None)
# Create prediction batch
new_batch = PredictionBatch(
id=uuid.uuid4(),
id=transformed_id,
tenant_id=virtual_uuid,
batch_name=batch.batch_name,
requested_at=adjusted_requested_at,
completed_at=adjusted_completed_at,
status=batch.status,
total_products=batch.total_products,
completed_products=batch.completed_products,
failed_products=batch.failed_products,
forecast_days=batch.forecast_days,
business_type=batch.business_type,
error_message=batch.error_message,
processing_time_ms=batch.processing_time_ms,
cancelled_by=batch.cancelled_by
batch_name=batch_data.get('batch_name'),
requested_at=batch_data.get('requested_at'),
completed_at=batch_data.get('completed_at'),
status=batch_data.get('status'),
total_products=batch_data.get('total_products'),
completed_products=batch_data.get('completed_products'),
failed_products=batch_data.get('failed_products'),
forecast_days=batch_data.get('forecast_days'),
business_type=batch_data.get('business_type'),
error_message=batch_data.get('error_message'),
processing_time_ms=batch_data.get('processing_time_ms'),
cancelled_by=batch_data.get('cancelled_by')
)
db.add(new_batch)
stats["prediction_batches"] += 1
@@ -198,11 +301,12 @@ async def clone_demo_data(
duration_ms = int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
logger.info(
"Forecasting data cloning completed",
virtual_tenant_id=virtual_tenant_id,
total_records=total_records,
stats=stats,
duration_ms=duration_ms
"Forecasting data cloned successfully",
virtual_tenant_id=str(virtual_uuid),
records_cloned=total_records,
duration_ms=duration_ms,
forecasts_cloned=stats["forecasts"],
batches_cloned=stats["prediction_batches"]
)
return {
@@ -210,11 +314,15 @@ async def clone_demo_data(
"status": "completed",
"records_cloned": total_records,
"duration_ms": duration_ms,
"details": stats
"details": {
"forecasts": stats["forecasts"],
"prediction_batches": stats["prediction_batches"],
"virtual_tenant_id": str(virtual_uuid)
}
}
except ValueError as e:
logger.error("Invalid UUID format", error=str(e))
logger.error("Invalid UUID format", error=str(e), virtual_tenant_id=virtual_tenant_id)
raise HTTPException(status_code=400, detail=f"Invalid UUID: {str(e)}")
except Exception as e:
@@ -248,3 +356,73 @@ async def clone_health_check(_: bool = Depends(verify_internal_api_key)):
"clone_endpoint": "available",
"version": "2.0.0"
}
@router.delete("/tenant/{virtual_tenant_id}")
async def delete_demo_tenant_data(
virtual_tenant_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
_: bool = Depends(verify_internal_api_key)
):
"""
Delete all demo data for a virtual tenant.
This endpoint is idempotent - safe to call multiple times.
"""
from sqlalchemy import delete
start_time = datetime.now(timezone.utc)
records_deleted = {
"forecasts": 0,
"prediction_batches": 0,
"total": 0
}
try:
# Delete in reverse dependency order
# 1. Delete prediction batches
result = await db.execute(
delete(PredictionBatch)
.where(PredictionBatch.tenant_id == virtual_tenant_id)
)
records_deleted["prediction_batches"] = result.rowcount
# 2. Delete forecasts
result = await db.execute(
delete(Forecast)
.where(Forecast.tenant_id == virtual_tenant_id)
)
records_deleted["forecasts"] = result.rowcount
records_deleted["total"] = sum(records_deleted.values())
await db.commit()
logger.info(
"demo_data_deleted",
service="forecasting",
virtual_tenant_id=str(virtual_tenant_id),
records_deleted=records_deleted
)
return {
"service": "forecasting",
"status": "deleted",
"virtual_tenant_id": str(virtual_tenant_id),
"records_deleted": records_deleted,
"duration_ms": int((datetime.now(timezone.utc) - start_time).total_seconds() * 1000)
}
except Exception as e:
await db.rollback()
logger.error(
"demo_data_deletion_failed",
service="forecasting",
virtual_tenant_id=str(virtual_tenant_id),
error=str(e)
)
raise HTTPException(
status_code=500,
detail=f"Failed to delete demo data: {str(e)}"
)

View File

@@ -14,7 +14,7 @@ from app.services.forecasting_alert_service import ForecastingAlertService
from shared.service_base import StandardFastAPIService
# Import API routers
from app.api import forecasts, forecasting_operations, analytics, scenario_operations, internal_demo, audit, ml_insights, validation, historical_validation, webhooks, performance_monitoring, retraining, enterprise_forecasting
from app.api import forecasts, forecasting_operations, analytics, scenario_operations, audit, ml_insights, validation, historical_validation, webhooks, performance_monitoring, retraining, enterprise_forecasting, internal_demo
class ForecastingService(StandardFastAPIService):
@@ -188,7 +188,7 @@ service.add_router(forecasts.router)
service.add_router(forecasting_operations.router)
service.add_router(analytics.router)
service.add_router(scenario_operations.router)
service.add_router(internal_demo.router)
service.add_router(internal_demo.router, tags=["internal-demo"])
service.add_router(ml_insights.router) # ML insights endpoint
service.add_router(validation.router) # Validation endpoint
service.add_router(historical_validation.router) # Historical validation endpoint

View File

@@ -1,506 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Demo Forecasting Seeding Script for Forecasting Service
Creates demand forecasts and prediction batches for demo template tenants
This script runs as a Kubernetes init job inside the forecasting-service container.
"""
import asyncio
import uuid
import sys
import os
import json
import random
from datetime import datetime, timezone, timedelta
from pathlib import Path
# Add app to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import structlog
from app.models.forecasts import Forecast, PredictionBatch
# Add shared path for demo utilities
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.utils.demo_dates import BASE_REFERENCE_DATE
# Configure logging
logger = structlog.get_logger()
DEMO_TENANT_PROFESSIONAL = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") # Individual bakery
# Day of week mapping
DAYS_OF_WEEK = {
0: "lunes",
1: "martes",
2: "miercoles",
3: "jueves",
4: "viernes",
5: "sabado",
6: "domingo"
}
def load_forecasting_config():
"""Load forecasting configuration from JSON file"""
config_file = Path(__file__).parent / "previsiones_config_es.json"
if not config_file.exists():
raise FileNotFoundError(f"Forecasting config file not found: {config_file}")
with open(config_file, 'r', encoding='utf-8') as f:
return json.load(f)
def calculate_datetime_from_offset(offset_days: int) -> datetime:
"""Calculate a datetime based on offset from BASE_REFERENCE_DATE"""
return BASE_REFERENCE_DATE + timedelta(days=offset_days)
def weighted_choice(choices: list) -> dict:
"""Make a weighted random choice from list of dicts with 'peso' key"""
total_weight = sum(c.get("peso", 1.0) for c in choices)
r = random.uniform(0, total_weight)
cumulative = 0
for choice in choices:
cumulative += choice.get("peso", 1.0)
if r <= cumulative:
return choice
return choices[-1]
def calculate_demand(
product: dict,
day_of_week: int,
is_weekend: bool,
weather_temp: float,
weather_precip: float,
traffic_volume: int,
config: dict
) -> float:
"""Calculate predicted demand based on various factors"""
# Base demand
base_demand = product["demanda_base_diaria"]
# Weekly trend factor
day_name = DAYS_OF_WEEK[day_of_week]
weekly_factor = product["tendencia_semanal"][day_name]
# Apply seasonality (simple growth factor for "creciente")
seasonality_factor = 1.0
if product["estacionalidad"] == "creciente":
seasonality_factor = 1.05
# Weather impact (simple model)
weather_factor = 1.0
temp_impact = config["configuracion_previsiones"]["factores_externos"]["temperatura"]["impacto_demanda"]
precip_impact = config["configuracion_previsiones"]["factores_externos"]["precipitacion"]["impacto_demanda"]
if weather_temp > 22.0:
weather_factor += temp_impact * (weather_temp - 22.0) / 10.0
if weather_precip > 0:
weather_factor += precip_impact
# Traffic correlation
traffic_correlation = config["configuracion_previsiones"]["factores_externos"]["volumen_trafico"]["correlacion_demanda"]
traffic_factor = 1.0 + (traffic_volume / 1000.0 - 1.0) * traffic_correlation
# Calculate predicted demand
predicted = base_demand * weekly_factor * seasonality_factor * weather_factor * traffic_factor
# Add randomness based on variability
variability = product["variabilidad"]
predicted = predicted * random.uniform(1.0 - variability, 1.0 + variability)
return max(0.0, predicted)
async def generate_forecasts_for_tenant(
db: AsyncSession,
tenant_id: uuid.UUID,
tenant_name: str,
business_type: str,
config: dict
):
"""Generate forecasts for a specific tenant"""
logger.info(f"Generating forecasts for: {tenant_name}", tenant_id=str(tenant_id))
# Check if forecasts already exist
result = await db.execute(
select(Forecast).where(Forecast.tenant_id == tenant_id).limit(1)
)
existing = result.scalar_one_or_none()
if existing:
logger.info(f"Forecasts already exist for {tenant_name}, skipping seed")
return {"tenant_id": str(tenant_id), "forecasts_created": 0, "batches_created": 0, "skipped": True}
forecast_config = config["configuracion_previsiones"]
batches_config = config["lotes_prediccion"]
# Get location for this business type
location = forecast_config["ubicaciones"][business_type]
# Get multiplier for central bakery
multiplier = forecast_config["multiplicador_central_bakery"] if business_type == "central_bakery" else 1.0
forecasts_created = 0
batches_created = 0
# Generate prediction batches first
num_batches = batches_config["lotes_por_tenant"]
for batch_idx in range(num_batches):
# Select batch status
status_rand = random.random()
cumulative = 0
batch_status = "completed"
for status, weight in batches_config["distribucion_estados"].items():
cumulative += weight
if status_rand <= cumulative:
batch_status = status
break
# Select forecast days
forecast_days = random.choice(batches_config["dias_prevision_lotes"])
# Create batch at different times in the past
requested_offset = -(batch_idx + 1) * 10 # Batches every 10 days in the past
requested_at = calculate_datetime_from_offset(requested_offset)
completed_at = None
processing_time = None
if batch_status == "completed":
processing_time = random.randint(5000, 25000) # 5-25 seconds
completed_at = requested_at + timedelta(milliseconds=processing_time)
batch = PredictionBatch(
id=uuid.uuid4(),
tenant_id=tenant_id,
batch_name=f"Previsión {forecast_days} días - {requested_at.strftime('%Y%m%d')}",
requested_at=requested_at,
completed_at=completed_at,
status=batch_status,
total_products=forecast_config["productos_por_tenant"],
completed_products=forecast_config["productos_por_tenant"] if batch_status == "completed" else 0,
failed_products=0 if batch_status != "failed" else random.randint(1, 3),
forecast_days=forecast_days,
business_type=business_type,
error_message="Error de conexión con servicio de clima" if batch_status == "failed" else None,
processing_time_ms=processing_time
)
db.add(batch)
batches_created += 1
await db.flush()
# Generate historical forecasts (past 30 days)
dias_historico = forecast_config["dias_historico"]
for product in forecast_config["productos_demo"]:
product_id = uuid.UUID(product["id"])
product_name = product["nombre"]
for day_offset in range(-dias_historico, 0):
forecast_date = calculate_datetime_from_offset(day_offset)
day_of_week = forecast_date.weekday()
is_weekend = day_of_week >= 5
# Generate weather data
weather_temp = random.uniform(
forecast_config["factores_externos"]["temperatura"]["min"],
forecast_config["factores_externos"]["temperatura"]["max"]
)
weather_precip = 0.0
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
weather_descriptions = ["Despejado", "Parcialmente nublado", "Nublado", "Lluvia ligera", "Lluvia"]
weather_desc = random.choice(weather_descriptions)
# Traffic volume
traffic_volume = random.randint(
forecast_config["factores_externos"]["volumen_trafico"]["min"],
forecast_config["factores_externos"]["volumen_trafico"]["max"]
)
# Calculate demand
predicted_demand = calculate_demand(
product, day_of_week, is_weekend,
weather_temp, weather_precip, traffic_volume, config
)
# Apply multiplier for central bakery
predicted_demand *= multiplier
# Calculate confidence intervals
lower_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] / 100.0
upper_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] / 100.0
confidence_lower = predicted_demand * (1.0 - lower_pct)
confidence_upper = predicted_demand * (1.0 + upper_pct)
# Select algorithm
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
algorithm = algorithm_choice["algoritmo"]
# Processing time
processing_time = random.randint(
forecast_config["tiempo_procesamiento_ms"]["min"],
forecast_config["tiempo_procesamiento_ms"]["max"]
)
# Model info
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
model_id = f"{algorithm}_{business_type}_{model_version}"
# Create forecast
forecast = Forecast(
id=uuid.uuid4(),
tenant_id=tenant_id,
inventory_product_id=product_id,
product_name=product_name,
location=location,
forecast_date=forecast_date,
created_at=forecast_date - timedelta(days=1), # Created day before
predicted_demand=predicted_demand,
confidence_lower=confidence_lower,
confidence_upper=confidence_upper,
confidence_level=forecast_config["nivel_confianza"],
model_id=model_id,
model_version=model_version,
algorithm=algorithm,
business_type=business_type,
day_of_week=day_of_week,
is_holiday=False, # Could add holiday logic
is_weekend=is_weekend,
weather_temperature=weather_temp,
weather_precipitation=weather_precip,
weather_description=weather_desc,
traffic_volume=traffic_volume,
processing_time_ms=processing_time,
features_used={
"day_of_week": True,
"weather": True,
"traffic": True,
"historical_demand": True,
"seasonality": True
}
)
db.add(forecast)
forecasts_created += 1
# Generate future forecasts (next 14 days)
dias_futuro = forecast_config["dias_prevision_futuro"]
for product in forecast_config["productos_demo"]:
product_id = uuid.UUID(product["id"])
product_name = product["nombre"]
for day_offset in range(1, dias_futuro + 1):
forecast_date = calculate_datetime_from_offset(day_offset)
day_of_week = forecast_date.weekday()
is_weekend = day_of_week >= 5
# Generate weather forecast data (slightly less certain)
weather_temp = random.uniform(
forecast_config["factores_externos"]["temperatura"]["min"],
forecast_config["factores_externos"]["temperatura"]["max"]
)
weather_precip = 0.0
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
weather_desc = random.choice(["Despejado", "Parcialmente nublado", "Nublado"])
traffic_volume = random.randint(
forecast_config["factores_externos"]["volumen_trafico"]["min"],
forecast_config["factores_externos"]["volumen_trafico"]["max"]
)
# Calculate demand
predicted_demand = calculate_demand(
product, day_of_week, is_weekend,
weather_temp, weather_precip, traffic_volume, config
)
predicted_demand *= multiplier
# Wider confidence intervals for future predictions
lower_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] + 5.0) / 100.0
upper_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] + 5.0) / 100.0
confidence_lower = predicted_demand * (1.0 - lower_pct)
confidence_upper = predicted_demand * (1.0 + upper_pct)
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
algorithm = algorithm_choice["algoritmo"]
processing_time = random.randint(
forecast_config["tiempo_procesamiento_ms"]["min"],
forecast_config["tiempo_procesamiento_ms"]["max"]
)
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
model_id = f"{algorithm}_{business_type}_{model_version}"
forecast = Forecast(
id=uuid.uuid4(),
tenant_id=tenant_id,
inventory_product_id=product_id,
product_name=product_name,
location=location,
forecast_date=forecast_date,
created_at=BASE_REFERENCE_DATE, # Created today
predicted_demand=predicted_demand,
confidence_lower=confidence_lower,
confidence_upper=confidence_upper,
confidence_level=forecast_config["nivel_confianza"],
model_id=model_id,
model_version=model_version,
algorithm=algorithm,
business_type=business_type,
day_of_week=day_of_week,
is_holiday=False,
is_weekend=is_weekend,
weather_temperature=weather_temp,
weather_precipitation=weather_precip,
weather_description=weather_desc,
traffic_volume=traffic_volume,
processing_time_ms=processing_time,
features_used={
"day_of_week": True,
"weather": True,
"traffic": True,
"historical_demand": True,
"seasonality": True
}
)
db.add(forecast)
forecasts_created += 1
await db.commit()
logger.info(f"Successfully created {forecasts_created} forecasts and {batches_created} batches for {tenant_name}")
return {
"tenant_id": str(tenant_id),
"forecasts_created": forecasts_created,
"batches_created": batches_created,
"skipped": False
}
async def seed_all(db: AsyncSession):
"""Seed all demo tenants with forecasting data"""
logger.info("Starting demo forecasting seed process")
# Load configuration
config = load_forecasting_config()
results = []
# Seed San Pablo (Individual Bakery)
# Seed Professional Bakery (merged from San Pablo + La Espiga)
result_professional = await generate_forecasts_for_tenant(
db,
DEMO_TENANT_PROFESSIONAL,
"Professional Bakery",
"individual_bakery",
config
)
results.append(result_professional)
total_forecasts = sum(r["forecasts_created"] for r in results)
total_batches = sum(r["batches_created"] for r in results)
return {
"results": results,
"total_forecasts_created": total_forecasts,
"total_batches_created": total_batches,
"status": "completed"
}
def validate_base_reference_date():
"""Ensure BASE_REFERENCE_DATE hasn't changed since last seed"""
expected_date = datetime(2025, 1, 8, 6, 0, 0, tzinfo=timezone.utc)
if BASE_REFERENCE_DATE != expected_date:
logger.warning(
"BASE_REFERENCE_DATE has changed! This may cause date inconsistencies.",
current=BASE_REFERENCE_DATE.isoformat(),
expected=expected_date.isoformat()
)
# Don't fail - just warn. Allow intentional changes.
logger.info("BASE_REFERENCE_DATE validation", date=BASE_REFERENCE_DATE.isoformat())
async def main():
"""Main execution function"""
validate_base_reference_date() # Add this line
# Get database URL from environment
database_url = os.getenv("FORECASTING_DATABASE_URL")
if not database_url:
logger.error("FORECASTING_DATABASE_URL environment variable must be set")
return 1
# Ensure asyncpg driver
if database_url.startswith("postgresql://"):
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
# Create async engine
engine = create_async_engine(database_url, echo=False)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
try:
async with async_session() as session:
result = await seed_all(session)
logger.info(
"Forecasting seed completed successfully!",
total_forecasts=result["total_forecasts_created"],
total_batches=result["total_batches_created"],
status=result["status"]
)
# Print summary
print("\n" + "="*60)
print("DEMO FORECASTING SEED SUMMARY")
print("="*60)
for tenant_result in result["results"]:
tenant_id = tenant_result["tenant_id"]
forecasts = tenant_result["forecasts_created"]
batches = tenant_result["batches_created"]
skipped = tenant_result.get("skipped", False)
status = "SKIPPED (already exists)" if skipped else f"CREATED {forecasts} forecasts, {batches} batches"
print(f"Tenant {tenant_id}: {status}")
print(f"\nTotal Forecasts: {result['total_forecasts_created']}")
print(f"Total Batches: {result['total_batches_created']}")
print("="*60 + "\n")
return 0
except Exception as e:
logger.error(f"Forecasting seed failed: {str(e)}", exc_info=True)
return 1
finally:
await engine.dispose()
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)

View File

@@ -1,167 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Demo Retail Forecasting Seeding Script for Forecasting Service
Creates store-level demand forecasts for child retail outlets
This script populates child retail tenants with AI-generated demand forecasts.
Usage:
python /app/scripts/demo/seed_demo_forecasts_retail.py
Environment Variables Required:
FORECASTING_DATABASE_URL - PostgreSQL connection string
DEMO_MODE - Set to 'production' for production seeding
"""
import asyncio
import uuid
import sys
import os
import random
from datetime import datetime, timezone, timedelta
from pathlib import Path
from decimal import Decimal
# Add app to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
# Add shared to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent))
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import structlog
from shared.utils.demo_dates import BASE_REFERENCE_DATE
from app.models import Forecast, PredictionBatch
structlog.configure(
processors=[
structlog.stdlib.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer()
]
)
logger = structlog.get_logger()
# Fixed Demo Tenant IDs
DEMO_TENANT_CHILD_1 = uuid.UUID("d4e5f6a7-b8c9-40d1-e2f3-a4b5c6d7e8f9") # Madrid Centro
DEMO_TENANT_CHILD_2 = uuid.UUID("e5f6a7b8-c9d0-41e2-f3a4-b5c6d7e8f9a0") # Barcelona Gràcia
DEMO_TENANT_CHILD_3 = uuid.UUID("f6a7b8c9-d0e1-42f3-a4b5-c6d7e8f9a0b1") # Valencia Ruzafa
# Product IDs
PRODUCT_IDS = {
"PRO-BAG-001": "20000000-0000-0000-0000-000000000001",
"PRO-CRO-001": "20000000-0000-0000-0000-000000000002",
"PRO-PUE-001": "20000000-0000-0000-0000-000000000003",
"PRO-NAP-001": "20000000-0000-0000-0000-000000000004",
}
# Retail forecasting patterns
RETAIL_FORECASTS = [
(DEMO_TENANT_CHILD_1, "Madrid Centro", {"PRO-BAG-001": 120, "PRO-CRO-001": 80, "PRO-PUE-001": 35, "PRO-NAP-001": 60}),
(DEMO_TENANT_CHILD_2, "Barcelona Gràcia", {"PRO-BAG-001": 90, "PRO-CRO-001": 60, "PRO-PUE-001": 25, "PRO-NAP-001": 45}),
(DEMO_TENANT_CHILD_3, "Valencia Ruzafa", {"PRO-BAG-001": 70, "PRO-CRO-001": 45, "PRO-PUE-001": 20, "PRO-NAP-001": 35})
]
async def seed_forecasts_for_retail_tenant(db: AsyncSession, tenant_id: uuid.UUID, tenant_name: str, base_forecasts: dict):
"""Seed forecasts for a retail tenant"""
logger.info(f"Seeding forecasts for: {tenant_name}", tenant_id=str(tenant_id))
created = 0
# Create 7 days of forecasts
for days_ahead in range(1, 8):
forecast_date = BASE_REFERENCE_DATE + timedelta(days=days_ahead)
for sku, base_qty in base_forecasts.items():
base_product_id = uuid.UUID(PRODUCT_IDS[sku])
tenant_int = int(tenant_id.hex, 16)
product_id = uuid.UUID(int=tenant_int ^ int(base_product_id.hex, 16))
# Weekend boost
is_weekend = forecast_date.weekday() in [5, 6]
day_of_week = forecast_date.weekday()
multiplier = random.uniform(1.3, 1.5) if is_weekend else random.uniform(0.9, 1.1)
forecasted_quantity = int(base_qty * multiplier)
forecast = Forecast(
id=uuid.uuid4(),
tenant_id=tenant_id,
inventory_product_id=product_id,
product_name=sku,
location=tenant_name,
forecast_date=forecast_date,
created_at=BASE_REFERENCE_DATE,
predicted_demand=float(forecasted_quantity),
confidence_lower=float(int(forecasted_quantity * 0.85)),
confidence_upper=float(int(forecasted_quantity * 1.15)),
confidence_level=0.90,
model_id="retail_forecast_model",
model_version="retail_v1.0",
algorithm="prophet_retail",
business_type="retail_outlet",
day_of_week=day_of_week,
is_holiday=False,
is_weekend=is_weekend,
weather_temperature=random.uniform(10.0, 25.0),
weather_precipitation=random.uniform(0.0, 5.0) if random.random() < 0.3 else 0.0,
weather_description="Clear" if random.random() > 0.3 else "Rainy",
traffic_volume=random.randint(50, 200) if is_weekend else random.randint(30, 120),
processing_time_ms=random.randint(50, 200),
features_used={"historical_sales": True, "weather": True, "day_of_week": True}
)
db.add(forecast)
created += 1
await db.commit()
logger.info(f"Created {created} forecasts for {tenant_name}")
return {"tenant_id": str(tenant_id), "forecasts_created": created}
async def seed_all(db: AsyncSession):
"""Seed all retail forecasts"""
logger.info("=" * 80)
logger.info("📈 Starting Demo Retail Forecasting Seeding")
logger.info("=" * 80)
results = []
for tenant_id, tenant_name, base_forecasts in RETAIL_FORECASTS:
result = await seed_forecasts_for_retail_tenant(db, tenant_id, f"{tenant_name} (Retail)", base_forecasts)
results.append(result)
total = sum(r["forecasts_created"] for r in results)
logger.info(f"✅ Total forecasts created: {total}")
return {"total_forecasts": total, "results": results}
async def main():
database_url = os.getenv("FORECASTING_DATABASE_URL") or os.getenv("DATABASE_URL")
if not database_url:
logger.error("❌ DATABASE_URL not set")
return 1
if database_url.startswith("postgresql://"):
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
engine = create_async_engine(database_url, echo=False, pool_pre_ping=True)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
try:
async with async_session() as session:
await seed_all(session)
logger.info("🎉 Retail forecasting seed completed!")
return 0
except Exception as e:
logger.error(f"❌ Seed failed: {e}", exc_info=True)
return 1
finally:
await engine.dispose()
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)