Improve demo seed
This commit is contained in:
498
services/forecasting/scripts/demo/seed_demo_forecasts.py
Executable file
498
services/forecasting/scripts/demo/seed_demo_forecasts.py
Executable file
@@ -0,0 +1,498 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Demo Forecasting Seeding Script for Forecasting Service
|
||||
Creates demand forecasts and prediction batches for demo template tenants
|
||||
|
||||
This script runs as a Kubernetes init job inside the forecasting-service container.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import uuid
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import random
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
# Add app to path
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy import select
|
||||
import structlog
|
||||
|
||||
from app.models.forecasts import Forecast, PredictionBatch
|
||||
|
||||
# Configure logging
|
||||
logger = structlog.get_logger()
|
||||
|
||||
# Base demo tenant IDs
|
||||
DEMO_TENANT_SAN_PABLO = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") # Individual bakery
|
||||
DEMO_TENANT_LA_ESPIGA = uuid.UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7") # Central bakery
|
||||
|
||||
# Base reference date for date calculations
|
||||
BASE_REFERENCE_DATE = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
|
||||
|
||||
# Day of week mapping
|
||||
DAYS_OF_WEEK = {
|
||||
0: "lunes",
|
||||
1: "martes",
|
||||
2: "miercoles",
|
||||
3: "jueves",
|
||||
4: "viernes",
|
||||
5: "sabado",
|
||||
6: "domingo"
|
||||
}
|
||||
|
||||
|
||||
def load_forecasting_config():
|
||||
"""Load forecasting configuration from JSON file"""
|
||||
config_file = Path(__file__).parent / "previsiones_config_es.json"
|
||||
if not config_file.exists():
|
||||
raise FileNotFoundError(f"Forecasting config file not found: {config_file}")
|
||||
|
||||
with open(config_file, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def calculate_datetime_from_offset(offset_days: int) -> datetime:
|
||||
"""Calculate a datetime based on offset from BASE_REFERENCE_DATE"""
|
||||
return BASE_REFERENCE_DATE + timedelta(days=offset_days)
|
||||
|
||||
|
||||
def weighted_choice(choices: list) -> dict:
|
||||
"""Make a weighted random choice from list of dicts with 'peso' key"""
|
||||
total_weight = sum(c.get("peso", 1.0) for c in choices)
|
||||
r = random.uniform(0, total_weight)
|
||||
|
||||
cumulative = 0
|
||||
for choice in choices:
|
||||
cumulative += choice.get("peso", 1.0)
|
||||
if r <= cumulative:
|
||||
return choice
|
||||
|
||||
return choices[-1]
|
||||
|
||||
|
||||
def calculate_demand(
|
||||
product: dict,
|
||||
day_of_week: int,
|
||||
is_weekend: bool,
|
||||
weather_temp: float,
|
||||
weather_precip: float,
|
||||
traffic_volume: int,
|
||||
config: dict
|
||||
) -> float:
|
||||
"""Calculate predicted demand based on various factors"""
|
||||
|
||||
# Base demand
|
||||
base_demand = product["demanda_base_diaria"]
|
||||
|
||||
# Weekly trend factor
|
||||
day_name = DAYS_OF_WEEK[day_of_week]
|
||||
weekly_factor = product["tendencia_semanal"][day_name]
|
||||
|
||||
# Apply seasonality (simple growth factor for "creciente")
|
||||
seasonality_factor = 1.0
|
||||
if product["estacionalidad"] == "creciente":
|
||||
seasonality_factor = 1.05
|
||||
|
||||
# Weather impact (simple model)
|
||||
weather_factor = 1.0
|
||||
temp_impact = config["configuracion_previsiones"]["factores_externos"]["temperatura"]["impacto_demanda"]
|
||||
precip_impact = config["configuracion_previsiones"]["factores_externos"]["precipitacion"]["impacto_demanda"]
|
||||
|
||||
if weather_temp > 22.0:
|
||||
weather_factor += temp_impact * (weather_temp - 22.0) / 10.0
|
||||
if weather_precip > 0:
|
||||
weather_factor += precip_impact
|
||||
|
||||
# Traffic correlation
|
||||
traffic_correlation = config["configuracion_previsiones"]["factores_externos"]["volumen_trafico"]["correlacion_demanda"]
|
||||
traffic_factor = 1.0 + (traffic_volume / 1000.0 - 1.0) * traffic_correlation
|
||||
|
||||
# Calculate predicted demand
|
||||
predicted = base_demand * weekly_factor * seasonality_factor * weather_factor * traffic_factor
|
||||
|
||||
# Add randomness based on variability
|
||||
variability = product["variabilidad"]
|
||||
predicted = predicted * random.uniform(1.0 - variability, 1.0 + variability)
|
||||
|
||||
return max(0.0, predicted)
|
||||
|
||||
|
||||
async def generate_forecasts_for_tenant(
|
||||
db: AsyncSession,
|
||||
tenant_id: uuid.UUID,
|
||||
tenant_name: str,
|
||||
business_type: str,
|
||||
config: dict
|
||||
):
|
||||
"""Generate forecasts for a specific tenant"""
|
||||
logger.info(f"Generating forecasts for: {tenant_name}", tenant_id=str(tenant_id))
|
||||
|
||||
# Check if forecasts already exist
|
||||
result = await db.execute(
|
||||
select(Forecast).where(Forecast.tenant_id == tenant_id).limit(1)
|
||||
)
|
||||
existing = result.scalar_one_or_none()
|
||||
|
||||
if existing:
|
||||
logger.info(f"Forecasts already exist for {tenant_name}, skipping seed")
|
||||
return {"tenant_id": str(tenant_id), "forecasts_created": 0, "batches_created": 0, "skipped": True}
|
||||
|
||||
forecast_config = config["configuracion_previsiones"]
|
||||
batches_config = config["lotes_prediccion"]
|
||||
|
||||
# Get location for this business type
|
||||
location = forecast_config["ubicaciones"][business_type]
|
||||
|
||||
# Get multiplier for central bakery
|
||||
multiplier = forecast_config["multiplicador_central_bakery"] if business_type == "central_bakery" else 1.0
|
||||
|
||||
forecasts_created = 0
|
||||
batches_created = 0
|
||||
|
||||
# Generate prediction batches first
|
||||
num_batches = batches_config["lotes_por_tenant"]
|
||||
|
||||
for batch_idx in range(num_batches):
|
||||
# Select batch status
|
||||
status_rand = random.random()
|
||||
cumulative = 0
|
||||
batch_status = "completed"
|
||||
for status, weight in batches_config["distribucion_estados"].items():
|
||||
cumulative += weight
|
||||
if status_rand <= cumulative:
|
||||
batch_status = status
|
||||
break
|
||||
|
||||
# Select forecast days
|
||||
forecast_days = random.choice(batches_config["dias_prevision_lotes"])
|
||||
|
||||
# Create batch at different times in the past
|
||||
requested_offset = -(batch_idx + 1) * 10 # Batches every 10 days in the past
|
||||
requested_at = calculate_datetime_from_offset(requested_offset)
|
||||
|
||||
completed_at = None
|
||||
processing_time = None
|
||||
if batch_status == "completed":
|
||||
processing_time = random.randint(5000, 25000) # 5-25 seconds
|
||||
completed_at = requested_at + timedelta(milliseconds=processing_time)
|
||||
|
||||
batch = PredictionBatch(
|
||||
id=uuid.uuid4(),
|
||||
tenant_id=tenant_id,
|
||||
batch_name=f"Previsión {forecast_days} días - {requested_at.strftime('%Y%m%d')}",
|
||||
requested_at=requested_at,
|
||||
completed_at=completed_at,
|
||||
status=batch_status,
|
||||
total_products=forecast_config["productos_por_tenant"],
|
||||
completed_products=forecast_config["productos_por_tenant"] if batch_status == "completed" else 0,
|
||||
failed_products=0 if batch_status != "failed" else random.randint(1, 3),
|
||||
forecast_days=forecast_days,
|
||||
business_type=business_type,
|
||||
error_message="Error de conexión con servicio de clima" if batch_status == "failed" else None,
|
||||
processing_time_ms=processing_time
|
||||
)
|
||||
|
||||
db.add(batch)
|
||||
batches_created += 1
|
||||
|
||||
await db.flush()
|
||||
|
||||
# Generate historical forecasts (past 30 days)
|
||||
dias_historico = forecast_config["dias_historico"]
|
||||
|
||||
for product in forecast_config["productos_demo"]:
|
||||
product_id = uuid.UUID(product["id"])
|
||||
product_name = product["nombre"]
|
||||
|
||||
for day_offset in range(-dias_historico, 0):
|
||||
forecast_date = calculate_datetime_from_offset(day_offset)
|
||||
day_of_week = forecast_date.weekday()
|
||||
is_weekend = day_of_week >= 5
|
||||
|
||||
# Generate weather data
|
||||
weather_temp = random.uniform(
|
||||
forecast_config["factores_externos"]["temperatura"]["min"],
|
||||
forecast_config["factores_externos"]["temperatura"]["max"]
|
||||
)
|
||||
weather_precip = 0.0
|
||||
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
|
||||
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
|
||||
|
||||
weather_descriptions = ["Despejado", "Parcialmente nublado", "Nublado", "Lluvia ligera", "Lluvia"]
|
||||
weather_desc = random.choice(weather_descriptions)
|
||||
|
||||
# Traffic volume
|
||||
traffic_volume = random.randint(
|
||||
forecast_config["factores_externos"]["volumen_trafico"]["min"],
|
||||
forecast_config["factores_externos"]["volumen_trafico"]["max"]
|
||||
)
|
||||
|
||||
# Calculate demand
|
||||
predicted_demand = calculate_demand(
|
||||
product, day_of_week, is_weekend,
|
||||
weather_temp, weather_precip, traffic_volume, config
|
||||
)
|
||||
|
||||
# Apply multiplier for central bakery
|
||||
predicted_demand *= multiplier
|
||||
|
||||
# Calculate confidence intervals
|
||||
lower_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] / 100.0
|
||||
upper_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] / 100.0
|
||||
|
||||
confidence_lower = predicted_demand * (1.0 - lower_pct)
|
||||
confidence_upper = predicted_demand * (1.0 + upper_pct)
|
||||
|
||||
# Select algorithm
|
||||
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
|
||||
algorithm = algorithm_choice["algoritmo"]
|
||||
|
||||
# Processing time
|
||||
processing_time = random.randint(
|
||||
forecast_config["tiempo_procesamiento_ms"]["min"],
|
||||
forecast_config["tiempo_procesamiento_ms"]["max"]
|
||||
)
|
||||
|
||||
# Model info
|
||||
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
|
||||
model_id = f"{algorithm}_{business_type}_{model_version}"
|
||||
|
||||
# Create forecast
|
||||
forecast = Forecast(
|
||||
id=uuid.uuid4(),
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=product_id,
|
||||
product_name=product_name,
|
||||
location=location,
|
||||
forecast_date=forecast_date,
|
||||
created_at=forecast_date - timedelta(days=1), # Created day before
|
||||
predicted_demand=predicted_demand,
|
||||
confidence_lower=confidence_lower,
|
||||
confidence_upper=confidence_upper,
|
||||
confidence_level=forecast_config["nivel_confianza"],
|
||||
model_id=model_id,
|
||||
model_version=model_version,
|
||||
algorithm=algorithm,
|
||||
business_type=business_type,
|
||||
day_of_week=day_of_week,
|
||||
is_holiday=False, # Could add holiday logic
|
||||
is_weekend=is_weekend,
|
||||
weather_temperature=weather_temp,
|
||||
weather_precipitation=weather_precip,
|
||||
weather_description=weather_desc,
|
||||
traffic_volume=traffic_volume,
|
||||
processing_time_ms=processing_time,
|
||||
features_used={
|
||||
"day_of_week": True,
|
||||
"weather": True,
|
||||
"traffic": True,
|
||||
"historical_demand": True,
|
||||
"seasonality": True
|
||||
}
|
||||
)
|
||||
|
||||
db.add(forecast)
|
||||
forecasts_created += 1
|
||||
|
||||
# Generate future forecasts (next 14 days)
|
||||
dias_futuro = forecast_config["dias_prevision_futuro"]
|
||||
|
||||
for product in forecast_config["productos_demo"]:
|
||||
product_id = uuid.UUID(product["id"])
|
||||
product_name = product["nombre"]
|
||||
|
||||
for day_offset in range(1, dias_futuro + 1):
|
||||
forecast_date = calculate_datetime_from_offset(day_offset)
|
||||
day_of_week = forecast_date.weekday()
|
||||
is_weekend = day_of_week >= 5
|
||||
|
||||
# Generate weather forecast data (slightly less certain)
|
||||
weather_temp = random.uniform(
|
||||
forecast_config["factores_externos"]["temperatura"]["min"],
|
||||
forecast_config["factores_externos"]["temperatura"]["max"]
|
||||
)
|
||||
weather_precip = 0.0
|
||||
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
|
||||
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
|
||||
|
||||
weather_desc = random.choice(["Despejado", "Parcialmente nublado", "Nublado"])
|
||||
|
||||
traffic_volume = random.randint(
|
||||
forecast_config["factores_externos"]["volumen_trafico"]["min"],
|
||||
forecast_config["factores_externos"]["volumen_trafico"]["max"]
|
||||
)
|
||||
|
||||
# Calculate demand
|
||||
predicted_demand = calculate_demand(
|
||||
product, day_of_week, is_weekend,
|
||||
weather_temp, weather_precip, traffic_volume, config
|
||||
)
|
||||
|
||||
predicted_demand *= multiplier
|
||||
|
||||
# Wider confidence intervals for future predictions
|
||||
lower_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] + 5.0) / 100.0
|
||||
upper_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] + 5.0) / 100.0
|
||||
|
||||
confidence_lower = predicted_demand * (1.0 - lower_pct)
|
||||
confidence_upper = predicted_demand * (1.0 + upper_pct)
|
||||
|
||||
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
|
||||
algorithm = algorithm_choice["algoritmo"]
|
||||
|
||||
processing_time = random.randint(
|
||||
forecast_config["tiempo_procesamiento_ms"]["min"],
|
||||
forecast_config["tiempo_procesamiento_ms"]["max"]
|
||||
)
|
||||
|
||||
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
|
||||
model_id = f"{algorithm}_{business_type}_{model_version}"
|
||||
|
||||
forecast = Forecast(
|
||||
id=uuid.uuid4(),
|
||||
tenant_id=tenant_id,
|
||||
inventory_product_id=product_id,
|
||||
product_name=product_name,
|
||||
location=location,
|
||||
forecast_date=forecast_date,
|
||||
created_at=BASE_REFERENCE_DATE, # Created today
|
||||
predicted_demand=predicted_demand,
|
||||
confidence_lower=confidence_lower,
|
||||
confidence_upper=confidence_upper,
|
||||
confidence_level=forecast_config["nivel_confianza"],
|
||||
model_id=model_id,
|
||||
model_version=model_version,
|
||||
algorithm=algorithm,
|
||||
business_type=business_type,
|
||||
day_of_week=day_of_week,
|
||||
is_holiday=False,
|
||||
is_weekend=is_weekend,
|
||||
weather_temperature=weather_temp,
|
||||
weather_precipitation=weather_precip,
|
||||
weather_description=weather_desc,
|
||||
traffic_volume=traffic_volume,
|
||||
processing_time_ms=processing_time,
|
||||
features_used={
|
||||
"day_of_week": True,
|
||||
"weather": True,
|
||||
"traffic": True,
|
||||
"historical_demand": True,
|
||||
"seasonality": True
|
||||
}
|
||||
)
|
||||
|
||||
db.add(forecast)
|
||||
forecasts_created += 1
|
||||
|
||||
await db.commit()
|
||||
logger.info(f"Successfully created {forecasts_created} forecasts and {batches_created} batches for {tenant_name}")
|
||||
|
||||
return {
|
||||
"tenant_id": str(tenant_id),
|
||||
"forecasts_created": forecasts_created,
|
||||
"batches_created": batches_created,
|
||||
"skipped": False
|
||||
}
|
||||
|
||||
|
||||
async def seed_all(db: AsyncSession):
|
||||
"""Seed all demo tenants with forecasting data"""
|
||||
logger.info("Starting demo forecasting seed process")
|
||||
|
||||
# Load configuration
|
||||
config = load_forecasting_config()
|
||||
|
||||
results = []
|
||||
|
||||
# Seed San Pablo (Individual Bakery)
|
||||
result_san_pablo = await generate_forecasts_for_tenant(
|
||||
db,
|
||||
DEMO_TENANT_SAN_PABLO,
|
||||
"San Pablo - Individual Bakery",
|
||||
"individual_bakery",
|
||||
config
|
||||
)
|
||||
results.append(result_san_pablo)
|
||||
|
||||
# Seed La Espiga (Central Bakery)
|
||||
result_la_espiga = await generate_forecasts_for_tenant(
|
||||
db,
|
||||
DEMO_TENANT_LA_ESPIGA,
|
||||
"La Espiga - Central Bakery",
|
||||
"central_bakery",
|
||||
config
|
||||
)
|
||||
results.append(result_la_espiga)
|
||||
|
||||
total_forecasts = sum(r["forecasts_created"] for r in results)
|
||||
total_batches = sum(r["batches_created"] for r in results)
|
||||
|
||||
return {
|
||||
"results": results,
|
||||
"total_forecasts_created": total_forecasts,
|
||||
"total_batches_created": total_batches,
|
||||
"status": "completed"
|
||||
}
|
||||
|
||||
|
||||
async def main():
|
||||
"""Main execution function"""
|
||||
# Get database URL from environment
|
||||
database_url = os.getenv("FORECASTING_DATABASE_URL")
|
||||
if not database_url:
|
||||
logger.error("FORECASTING_DATABASE_URL environment variable must be set")
|
||||
return 1
|
||||
|
||||
# Ensure asyncpg driver
|
||||
if database_url.startswith("postgresql://"):
|
||||
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
|
||||
|
||||
# Create async engine
|
||||
engine = create_async_engine(database_url, echo=False)
|
||||
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
try:
|
||||
async with async_session() as session:
|
||||
result = await seed_all(session)
|
||||
|
||||
logger.info(
|
||||
"Forecasting seed completed successfully!",
|
||||
total_forecasts=result["total_forecasts_created"],
|
||||
total_batches=result["total_batches_created"],
|
||||
status=result["status"]
|
||||
)
|
||||
|
||||
# Print summary
|
||||
print("\n" + "="*60)
|
||||
print("DEMO FORECASTING SEED SUMMARY")
|
||||
print("="*60)
|
||||
for tenant_result in result["results"]:
|
||||
tenant_id = tenant_result["tenant_id"]
|
||||
forecasts = tenant_result["forecasts_created"]
|
||||
batches = tenant_result["batches_created"]
|
||||
skipped = tenant_result.get("skipped", False)
|
||||
status = "SKIPPED (already exists)" if skipped else f"CREATED {forecasts} forecasts, {batches} batches"
|
||||
print(f"Tenant {tenant_id}: {status}")
|
||||
print(f"\nTotal Forecasts: {result['total_forecasts_created']}")
|
||||
print(f"Total Batches: {result['total_batches_created']}")
|
||||
print("="*60 + "\n")
|
||||
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Forecasting seed failed: {str(e)}", exc_info=True)
|
||||
return 1
|
||||
finally:
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit_code = asyncio.run(main())
|
||||
sys.exit(exit_code)
|
||||
Reference in New Issue
Block a user