demo seed change

This commit is contained in:
Urtzi Alfaro
2025-12-13 23:57:54 +01:00
parent f3688dfb04
commit ff830a3415
299 changed files with 20328 additions and 19485 deletions

View File

@@ -1,506 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Demo Forecasting Seeding Script for Forecasting Service
Creates demand forecasts and prediction batches for demo template tenants
This script runs as a Kubernetes init job inside the forecasting-service container.
"""
import asyncio
import uuid
import sys
import os
import json
import random
from datetime import datetime, timezone, timedelta
from pathlib import Path
# Add app to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import structlog
from app.models.forecasts import Forecast, PredictionBatch
# Add shared path for demo utilities
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.utils.demo_dates import BASE_REFERENCE_DATE
# Configure logging
logger = structlog.get_logger()
DEMO_TENANT_PROFESSIONAL = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") # Individual bakery
# Day of week mapping
DAYS_OF_WEEK = {
0: "lunes",
1: "martes",
2: "miercoles",
3: "jueves",
4: "viernes",
5: "sabado",
6: "domingo"
}
def load_forecasting_config():
"""Load forecasting configuration from JSON file"""
config_file = Path(__file__).parent / "previsiones_config_es.json"
if not config_file.exists():
raise FileNotFoundError(f"Forecasting config file not found: {config_file}")
with open(config_file, 'r', encoding='utf-8') as f:
return json.load(f)
def calculate_datetime_from_offset(offset_days: int) -> datetime:
"""Calculate a datetime based on offset from BASE_REFERENCE_DATE"""
return BASE_REFERENCE_DATE + timedelta(days=offset_days)
def weighted_choice(choices: list) -> dict:
"""Make a weighted random choice from list of dicts with 'peso' key"""
total_weight = sum(c.get("peso", 1.0) for c in choices)
r = random.uniform(0, total_weight)
cumulative = 0
for choice in choices:
cumulative += choice.get("peso", 1.0)
if r <= cumulative:
return choice
return choices[-1]
def calculate_demand(
product: dict,
day_of_week: int,
is_weekend: bool,
weather_temp: float,
weather_precip: float,
traffic_volume: int,
config: dict
) -> float:
"""Calculate predicted demand based on various factors"""
# Base demand
base_demand = product["demanda_base_diaria"]
# Weekly trend factor
day_name = DAYS_OF_WEEK[day_of_week]
weekly_factor = product["tendencia_semanal"][day_name]
# Apply seasonality (simple growth factor for "creciente")
seasonality_factor = 1.0
if product["estacionalidad"] == "creciente":
seasonality_factor = 1.05
# Weather impact (simple model)
weather_factor = 1.0
temp_impact = config["configuracion_previsiones"]["factores_externos"]["temperatura"]["impacto_demanda"]
precip_impact = config["configuracion_previsiones"]["factores_externos"]["precipitacion"]["impacto_demanda"]
if weather_temp > 22.0:
weather_factor += temp_impact * (weather_temp - 22.0) / 10.0
if weather_precip > 0:
weather_factor += precip_impact
# Traffic correlation
traffic_correlation = config["configuracion_previsiones"]["factores_externos"]["volumen_trafico"]["correlacion_demanda"]
traffic_factor = 1.0 + (traffic_volume / 1000.0 - 1.0) * traffic_correlation
# Calculate predicted demand
predicted = base_demand * weekly_factor * seasonality_factor * weather_factor * traffic_factor
# Add randomness based on variability
variability = product["variabilidad"]
predicted = predicted * random.uniform(1.0 - variability, 1.0 + variability)
return max(0.0, predicted)
async def generate_forecasts_for_tenant(
db: AsyncSession,
tenant_id: uuid.UUID,
tenant_name: str,
business_type: str,
config: dict
):
"""Generate forecasts for a specific tenant"""
logger.info(f"Generating forecasts for: {tenant_name}", tenant_id=str(tenant_id))
# Check if forecasts already exist
result = await db.execute(
select(Forecast).where(Forecast.tenant_id == tenant_id).limit(1)
)
existing = result.scalar_one_or_none()
if existing:
logger.info(f"Forecasts already exist for {tenant_name}, skipping seed")
return {"tenant_id": str(tenant_id), "forecasts_created": 0, "batches_created": 0, "skipped": True}
forecast_config = config["configuracion_previsiones"]
batches_config = config["lotes_prediccion"]
# Get location for this business type
location = forecast_config["ubicaciones"][business_type]
# Get multiplier for central bakery
multiplier = forecast_config["multiplicador_central_bakery"] if business_type == "central_bakery" else 1.0
forecasts_created = 0
batches_created = 0
# Generate prediction batches first
num_batches = batches_config["lotes_por_tenant"]
for batch_idx in range(num_batches):
# Select batch status
status_rand = random.random()
cumulative = 0
batch_status = "completed"
for status, weight in batches_config["distribucion_estados"].items():
cumulative += weight
if status_rand <= cumulative:
batch_status = status
break
# Select forecast days
forecast_days = random.choice(batches_config["dias_prevision_lotes"])
# Create batch at different times in the past
requested_offset = -(batch_idx + 1) * 10 # Batches every 10 days in the past
requested_at = calculate_datetime_from_offset(requested_offset)
completed_at = None
processing_time = None
if batch_status == "completed":
processing_time = random.randint(5000, 25000) # 5-25 seconds
completed_at = requested_at + timedelta(milliseconds=processing_time)
batch = PredictionBatch(
id=uuid.uuid4(),
tenant_id=tenant_id,
batch_name=f"Previsión {forecast_days} días - {requested_at.strftime('%Y%m%d')}",
requested_at=requested_at,
completed_at=completed_at,
status=batch_status,
total_products=forecast_config["productos_por_tenant"],
completed_products=forecast_config["productos_por_tenant"] if batch_status == "completed" else 0,
failed_products=0 if batch_status != "failed" else random.randint(1, 3),
forecast_days=forecast_days,
business_type=business_type,
error_message="Error de conexión con servicio de clima" if batch_status == "failed" else None,
processing_time_ms=processing_time
)
db.add(batch)
batches_created += 1
await db.flush()
# Generate historical forecasts (past 30 days)
dias_historico = forecast_config["dias_historico"]
for product in forecast_config["productos_demo"]:
product_id = uuid.UUID(product["id"])
product_name = product["nombre"]
for day_offset in range(-dias_historico, 0):
forecast_date = calculate_datetime_from_offset(day_offset)
day_of_week = forecast_date.weekday()
is_weekend = day_of_week >= 5
# Generate weather data
weather_temp = random.uniform(
forecast_config["factores_externos"]["temperatura"]["min"],
forecast_config["factores_externos"]["temperatura"]["max"]
)
weather_precip = 0.0
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
weather_descriptions = ["Despejado", "Parcialmente nublado", "Nublado", "Lluvia ligera", "Lluvia"]
weather_desc = random.choice(weather_descriptions)
# Traffic volume
traffic_volume = random.randint(
forecast_config["factores_externos"]["volumen_trafico"]["min"],
forecast_config["factores_externos"]["volumen_trafico"]["max"]
)
# Calculate demand
predicted_demand = calculate_demand(
product, day_of_week, is_weekend,
weather_temp, weather_precip, traffic_volume, config
)
# Apply multiplier for central bakery
predicted_demand *= multiplier
# Calculate confidence intervals
lower_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] / 100.0
upper_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] / 100.0
confidence_lower = predicted_demand * (1.0 - lower_pct)
confidence_upper = predicted_demand * (1.0 + upper_pct)
# Select algorithm
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
algorithm = algorithm_choice["algoritmo"]
# Processing time
processing_time = random.randint(
forecast_config["tiempo_procesamiento_ms"]["min"],
forecast_config["tiempo_procesamiento_ms"]["max"]
)
# Model info
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
model_id = f"{algorithm}_{business_type}_{model_version}"
# Create forecast
forecast = Forecast(
id=uuid.uuid4(),
tenant_id=tenant_id,
inventory_product_id=product_id,
product_name=product_name,
location=location,
forecast_date=forecast_date,
created_at=forecast_date - timedelta(days=1), # Created day before
predicted_demand=predicted_demand,
confidence_lower=confidence_lower,
confidence_upper=confidence_upper,
confidence_level=forecast_config["nivel_confianza"],
model_id=model_id,
model_version=model_version,
algorithm=algorithm,
business_type=business_type,
day_of_week=day_of_week,
is_holiday=False, # Could add holiday logic
is_weekend=is_weekend,
weather_temperature=weather_temp,
weather_precipitation=weather_precip,
weather_description=weather_desc,
traffic_volume=traffic_volume,
processing_time_ms=processing_time,
features_used={
"day_of_week": True,
"weather": True,
"traffic": True,
"historical_demand": True,
"seasonality": True
}
)
db.add(forecast)
forecasts_created += 1
# Generate future forecasts (next 14 days)
dias_futuro = forecast_config["dias_prevision_futuro"]
for product in forecast_config["productos_demo"]:
product_id = uuid.UUID(product["id"])
product_name = product["nombre"]
for day_offset in range(1, dias_futuro + 1):
forecast_date = calculate_datetime_from_offset(day_offset)
day_of_week = forecast_date.weekday()
is_weekend = day_of_week >= 5
# Generate weather forecast data (slightly less certain)
weather_temp = random.uniform(
forecast_config["factores_externos"]["temperatura"]["min"],
forecast_config["factores_externos"]["temperatura"]["max"]
)
weather_precip = 0.0
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
weather_desc = random.choice(["Despejado", "Parcialmente nublado", "Nublado"])
traffic_volume = random.randint(
forecast_config["factores_externos"]["volumen_trafico"]["min"],
forecast_config["factores_externos"]["volumen_trafico"]["max"]
)
# Calculate demand
predicted_demand = calculate_demand(
product, day_of_week, is_weekend,
weather_temp, weather_precip, traffic_volume, config
)
predicted_demand *= multiplier
# Wider confidence intervals for future predictions
lower_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] + 5.0) / 100.0
upper_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] + 5.0) / 100.0
confidence_lower = predicted_demand * (1.0 - lower_pct)
confidence_upper = predicted_demand * (1.0 + upper_pct)
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
algorithm = algorithm_choice["algoritmo"]
processing_time = random.randint(
forecast_config["tiempo_procesamiento_ms"]["min"],
forecast_config["tiempo_procesamiento_ms"]["max"]
)
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
model_id = f"{algorithm}_{business_type}_{model_version}"
forecast = Forecast(
id=uuid.uuid4(),
tenant_id=tenant_id,
inventory_product_id=product_id,
product_name=product_name,
location=location,
forecast_date=forecast_date,
created_at=BASE_REFERENCE_DATE, # Created today
predicted_demand=predicted_demand,
confidence_lower=confidence_lower,
confidence_upper=confidence_upper,
confidence_level=forecast_config["nivel_confianza"],
model_id=model_id,
model_version=model_version,
algorithm=algorithm,
business_type=business_type,
day_of_week=day_of_week,
is_holiday=False,
is_weekend=is_weekend,
weather_temperature=weather_temp,
weather_precipitation=weather_precip,
weather_description=weather_desc,
traffic_volume=traffic_volume,
processing_time_ms=processing_time,
features_used={
"day_of_week": True,
"weather": True,
"traffic": True,
"historical_demand": True,
"seasonality": True
}
)
db.add(forecast)
forecasts_created += 1
await db.commit()
logger.info(f"Successfully created {forecasts_created} forecasts and {batches_created} batches for {tenant_name}")
return {
"tenant_id": str(tenant_id),
"forecasts_created": forecasts_created,
"batches_created": batches_created,
"skipped": False
}
async def seed_all(db: AsyncSession):
"""Seed all demo tenants with forecasting data"""
logger.info("Starting demo forecasting seed process")
# Load configuration
config = load_forecasting_config()
results = []
# Seed San Pablo (Individual Bakery)
# Seed Professional Bakery (merged from San Pablo + La Espiga)
result_professional = await generate_forecasts_for_tenant(
db,
DEMO_TENANT_PROFESSIONAL,
"Professional Bakery",
"individual_bakery",
config
)
results.append(result_professional)
total_forecasts = sum(r["forecasts_created"] for r in results)
total_batches = sum(r["batches_created"] for r in results)
return {
"results": results,
"total_forecasts_created": total_forecasts,
"total_batches_created": total_batches,
"status": "completed"
}
def validate_base_reference_date():
"""Ensure BASE_REFERENCE_DATE hasn't changed since last seed"""
expected_date = datetime(2025, 1, 8, 6, 0, 0, tzinfo=timezone.utc)
if BASE_REFERENCE_DATE != expected_date:
logger.warning(
"BASE_REFERENCE_DATE has changed! This may cause date inconsistencies.",
current=BASE_REFERENCE_DATE.isoformat(),
expected=expected_date.isoformat()
)
# Don't fail - just warn. Allow intentional changes.
logger.info("BASE_REFERENCE_DATE validation", date=BASE_REFERENCE_DATE.isoformat())
async def main():
"""Main execution function"""
validate_base_reference_date() # Add this line
# Get database URL from environment
database_url = os.getenv("FORECASTING_DATABASE_URL")
if not database_url:
logger.error("FORECASTING_DATABASE_URL environment variable must be set")
return 1
# Ensure asyncpg driver
if database_url.startswith("postgresql://"):
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
# Create async engine
engine = create_async_engine(database_url, echo=False)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
try:
async with async_session() as session:
result = await seed_all(session)
logger.info(
"Forecasting seed completed successfully!",
total_forecasts=result["total_forecasts_created"],
total_batches=result["total_batches_created"],
status=result["status"]
)
# Print summary
print("\n" + "="*60)
print("DEMO FORECASTING SEED SUMMARY")
print("="*60)
for tenant_result in result["results"]:
tenant_id = tenant_result["tenant_id"]
forecasts = tenant_result["forecasts_created"]
batches = tenant_result["batches_created"]
skipped = tenant_result.get("skipped", False)
status = "SKIPPED (already exists)" if skipped else f"CREATED {forecasts} forecasts, {batches} batches"
print(f"Tenant {tenant_id}: {status}")
print(f"\nTotal Forecasts: {result['total_forecasts_created']}")
print(f"Total Batches: {result['total_batches_created']}")
print("="*60 + "\n")
return 0
except Exception as e:
logger.error(f"Forecasting seed failed: {str(e)}", exc_info=True)
return 1
finally:
await engine.dispose()
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)

View File

@@ -1,167 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Demo Retail Forecasting Seeding Script for Forecasting Service
Creates store-level demand forecasts for child retail outlets
This script populates child retail tenants with AI-generated demand forecasts.
Usage:
python /app/scripts/demo/seed_demo_forecasts_retail.py
Environment Variables Required:
FORECASTING_DATABASE_URL - PostgreSQL connection string
DEMO_MODE - Set to 'production' for production seeding
"""
import asyncio
import uuid
import sys
import os
import random
from datetime import datetime, timezone, timedelta
from pathlib import Path
from decimal import Decimal
# Add app to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
# Add shared to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent))
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import structlog
from shared.utils.demo_dates import BASE_REFERENCE_DATE
from app.models import Forecast, PredictionBatch
structlog.configure(
processors=[
structlog.stdlib.add_log_level,
structlog.processors.TimeStamper(fmt="iso"),
structlog.dev.ConsoleRenderer()
]
)
logger = structlog.get_logger()
# Fixed Demo Tenant IDs
DEMO_TENANT_CHILD_1 = uuid.UUID("d4e5f6a7-b8c9-40d1-e2f3-a4b5c6d7e8f9") # Madrid Centro
DEMO_TENANT_CHILD_2 = uuid.UUID("e5f6a7b8-c9d0-41e2-f3a4-b5c6d7e8f9a0") # Barcelona Gràcia
DEMO_TENANT_CHILD_3 = uuid.UUID("f6a7b8c9-d0e1-42f3-a4b5-c6d7e8f9a0b1") # Valencia Ruzafa
# Product IDs
PRODUCT_IDS = {
"PRO-BAG-001": "20000000-0000-0000-0000-000000000001",
"PRO-CRO-001": "20000000-0000-0000-0000-000000000002",
"PRO-PUE-001": "20000000-0000-0000-0000-000000000003",
"PRO-NAP-001": "20000000-0000-0000-0000-000000000004",
}
# Retail forecasting patterns
RETAIL_FORECASTS = [
(DEMO_TENANT_CHILD_1, "Madrid Centro", {"PRO-BAG-001": 120, "PRO-CRO-001": 80, "PRO-PUE-001": 35, "PRO-NAP-001": 60}),
(DEMO_TENANT_CHILD_2, "Barcelona Gràcia", {"PRO-BAG-001": 90, "PRO-CRO-001": 60, "PRO-PUE-001": 25, "PRO-NAP-001": 45}),
(DEMO_TENANT_CHILD_3, "Valencia Ruzafa", {"PRO-BAG-001": 70, "PRO-CRO-001": 45, "PRO-PUE-001": 20, "PRO-NAP-001": 35})
]
async def seed_forecasts_for_retail_tenant(db: AsyncSession, tenant_id: uuid.UUID, tenant_name: str, base_forecasts: dict):
"""Seed forecasts for a retail tenant"""
logger.info(f"Seeding forecasts for: {tenant_name}", tenant_id=str(tenant_id))
created = 0
# Create 7 days of forecasts
for days_ahead in range(1, 8):
forecast_date = BASE_REFERENCE_DATE + timedelta(days=days_ahead)
for sku, base_qty in base_forecasts.items():
base_product_id = uuid.UUID(PRODUCT_IDS[sku])
tenant_int = int(tenant_id.hex, 16)
product_id = uuid.UUID(int=tenant_int ^ int(base_product_id.hex, 16))
# Weekend boost
is_weekend = forecast_date.weekday() in [5, 6]
day_of_week = forecast_date.weekday()
multiplier = random.uniform(1.3, 1.5) if is_weekend else random.uniform(0.9, 1.1)
forecasted_quantity = int(base_qty * multiplier)
forecast = Forecast(
id=uuid.uuid4(),
tenant_id=tenant_id,
inventory_product_id=product_id,
product_name=sku,
location=tenant_name,
forecast_date=forecast_date,
created_at=BASE_REFERENCE_DATE,
predicted_demand=float(forecasted_quantity),
confidence_lower=float(int(forecasted_quantity * 0.85)),
confidence_upper=float(int(forecasted_quantity * 1.15)),
confidence_level=0.90,
model_id="retail_forecast_model",
model_version="retail_v1.0",
algorithm="prophet_retail",
business_type="retail_outlet",
day_of_week=day_of_week,
is_holiday=False,
is_weekend=is_weekend,
weather_temperature=random.uniform(10.0, 25.0),
weather_precipitation=random.uniform(0.0, 5.0) if random.random() < 0.3 else 0.0,
weather_description="Clear" if random.random() > 0.3 else "Rainy",
traffic_volume=random.randint(50, 200) if is_weekend else random.randint(30, 120),
processing_time_ms=random.randint(50, 200),
features_used={"historical_sales": True, "weather": True, "day_of_week": True}
)
db.add(forecast)
created += 1
await db.commit()
logger.info(f"Created {created} forecasts for {tenant_name}")
return {"tenant_id": str(tenant_id), "forecasts_created": created}
async def seed_all(db: AsyncSession):
"""Seed all retail forecasts"""
logger.info("=" * 80)
logger.info("📈 Starting Demo Retail Forecasting Seeding")
logger.info("=" * 80)
results = []
for tenant_id, tenant_name, base_forecasts in RETAIL_FORECASTS:
result = await seed_forecasts_for_retail_tenant(db, tenant_id, f"{tenant_name} (Retail)", base_forecasts)
results.append(result)
total = sum(r["forecasts_created"] for r in results)
logger.info(f"✅ Total forecasts created: {total}")
return {"total_forecasts": total, "results": results}
async def main():
database_url = os.getenv("FORECASTING_DATABASE_URL") or os.getenv("DATABASE_URL")
if not database_url:
logger.error("❌ DATABASE_URL not set")
return 1
if database_url.startswith("postgresql://"):
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
engine = create_async_engine(database_url, echo=False, pool_pre_ping=True)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
try:
async with async_session() as session:
await seed_all(session)
logger.info("🎉 Retail forecasting seed completed!")
return 0
except Exception as e:
logger.error(f"❌ Seed failed: {e}", exc_info=True)
return 1
finally:
await engine.dispose()
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)