499 lines
18 KiB
Python
Executable File
499 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Demo Forecasting Seeding Script for Forecasting Service
|
|
Creates demand forecasts and prediction batches for demo template tenants
|
|
|
|
This script runs as a Kubernetes init job inside the forecasting-service container.
|
|
"""
|
|
|
|
import asyncio
|
|
import uuid
|
|
import sys
|
|
import os
|
|
import json
|
|
import random
|
|
from datetime import datetime, timezone, timedelta
|
|
from pathlib import Path
|
|
|
|
# Add app to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
from sqlalchemy import select
|
|
import structlog
|
|
|
|
from app.models.forecasts import Forecast, PredictionBatch
|
|
|
|
# Configure logging
|
|
logger = structlog.get_logger()
|
|
|
|
# Base demo tenant IDs
|
|
DEMO_TENANT_SAN_PABLO = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") # Individual bakery
|
|
DEMO_TENANT_LA_ESPIGA = uuid.UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7") # Central bakery
|
|
|
|
# Base reference date for date calculations
|
|
BASE_REFERENCE_DATE = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
|
|
|
|
# Day of week mapping
|
|
DAYS_OF_WEEK = {
|
|
0: "lunes",
|
|
1: "martes",
|
|
2: "miercoles",
|
|
3: "jueves",
|
|
4: "viernes",
|
|
5: "sabado",
|
|
6: "domingo"
|
|
}
|
|
|
|
|
|
def load_forecasting_config():
|
|
"""Load forecasting configuration from JSON file"""
|
|
config_file = Path(__file__).parent / "previsiones_config_es.json"
|
|
if not config_file.exists():
|
|
raise FileNotFoundError(f"Forecasting config file not found: {config_file}")
|
|
|
|
with open(config_file, 'r', encoding='utf-8') as f:
|
|
return json.load(f)
|
|
|
|
|
|
def calculate_datetime_from_offset(offset_days: int) -> datetime:
|
|
"""Calculate a datetime based on offset from BASE_REFERENCE_DATE"""
|
|
return BASE_REFERENCE_DATE + timedelta(days=offset_days)
|
|
|
|
|
|
def weighted_choice(choices: list) -> dict:
|
|
"""Make a weighted random choice from list of dicts with 'peso' key"""
|
|
total_weight = sum(c.get("peso", 1.0) for c in choices)
|
|
r = random.uniform(0, total_weight)
|
|
|
|
cumulative = 0
|
|
for choice in choices:
|
|
cumulative += choice.get("peso", 1.0)
|
|
if r <= cumulative:
|
|
return choice
|
|
|
|
return choices[-1]
|
|
|
|
|
|
def calculate_demand(
|
|
product: dict,
|
|
day_of_week: int,
|
|
is_weekend: bool,
|
|
weather_temp: float,
|
|
weather_precip: float,
|
|
traffic_volume: int,
|
|
config: dict
|
|
) -> float:
|
|
"""Calculate predicted demand based on various factors"""
|
|
|
|
# Base demand
|
|
base_demand = product["demanda_base_diaria"]
|
|
|
|
# Weekly trend factor
|
|
day_name = DAYS_OF_WEEK[day_of_week]
|
|
weekly_factor = product["tendencia_semanal"][day_name]
|
|
|
|
# Apply seasonality (simple growth factor for "creciente")
|
|
seasonality_factor = 1.0
|
|
if product["estacionalidad"] == "creciente":
|
|
seasonality_factor = 1.05
|
|
|
|
# Weather impact (simple model)
|
|
weather_factor = 1.0
|
|
temp_impact = config["configuracion_previsiones"]["factores_externos"]["temperatura"]["impacto_demanda"]
|
|
precip_impact = config["configuracion_previsiones"]["factores_externos"]["precipitacion"]["impacto_demanda"]
|
|
|
|
if weather_temp > 22.0:
|
|
weather_factor += temp_impact * (weather_temp - 22.0) / 10.0
|
|
if weather_precip > 0:
|
|
weather_factor += precip_impact
|
|
|
|
# Traffic correlation
|
|
traffic_correlation = config["configuracion_previsiones"]["factores_externos"]["volumen_trafico"]["correlacion_demanda"]
|
|
traffic_factor = 1.0 + (traffic_volume / 1000.0 - 1.0) * traffic_correlation
|
|
|
|
# Calculate predicted demand
|
|
predicted = base_demand * weekly_factor * seasonality_factor * weather_factor * traffic_factor
|
|
|
|
# Add randomness based on variability
|
|
variability = product["variabilidad"]
|
|
predicted = predicted * random.uniform(1.0 - variability, 1.0 + variability)
|
|
|
|
return max(0.0, predicted)
|
|
|
|
|
|
async def generate_forecasts_for_tenant(
|
|
db: AsyncSession,
|
|
tenant_id: uuid.UUID,
|
|
tenant_name: str,
|
|
business_type: str,
|
|
config: dict
|
|
):
|
|
"""Generate forecasts for a specific tenant"""
|
|
logger.info(f"Generating forecasts for: {tenant_name}", tenant_id=str(tenant_id))
|
|
|
|
# Check if forecasts already exist
|
|
result = await db.execute(
|
|
select(Forecast).where(Forecast.tenant_id == tenant_id).limit(1)
|
|
)
|
|
existing = result.scalar_one_or_none()
|
|
|
|
if existing:
|
|
logger.info(f"Forecasts already exist for {tenant_name}, skipping seed")
|
|
return {"tenant_id": str(tenant_id), "forecasts_created": 0, "batches_created": 0, "skipped": True}
|
|
|
|
forecast_config = config["configuracion_previsiones"]
|
|
batches_config = config["lotes_prediccion"]
|
|
|
|
# Get location for this business type
|
|
location = forecast_config["ubicaciones"][business_type]
|
|
|
|
# Get multiplier for central bakery
|
|
multiplier = forecast_config["multiplicador_central_bakery"] if business_type == "central_bakery" else 1.0
|
|
|
|
forecasts_created = 0
|
|
batches_created = 0
|
|
|
|
# Generate prediction batches first
|
|
num_batches = batches_config["lotes_por_tenant"]
|
|
|
|
for batch_idx in range(num_batches):
|
|
# Select batch status
|
|
status_rand = random.random()
|
|
cumulative = 0
|
|
batch_status = "completed"
|
|
for status, weight in batches_config["distribucion_estados"].items():
|
|
cumulative += weight
|
|
if status_rand <= cumulative:
|
|
batch_status = status
|
|
break
|
|
|
|
# Select forecast days
|
|
forecast_days = random.choice(batches_config["dias_prevision_lotes"])
|
|
|
|
# Create batch at different times in the past
|
|
requested_offset = -(batch_idx + 1) * 10 # Batches every 10 days in the past
|
|
requested_at = calculate_datetime_from_offset(requested_offset)
|
|
|
|
completed_at = None
|
|
processing_time = None
|
|
if batch_status == "completed":
|
|
processing_time = random.randint(5000, 25000) # 5-25 seconds
|
|
completed_at = requested_at + timedelta(milliseconds=processing_time)
|
|
|
|
batch = PredictionBatch(
|
|
id=uuid.uuid4(),
|
|
tenant_id=tenant_id,
|
|
batch_name=f"Previsión {forecast_days} días - {requested_at.strftime('%Y%m%d')}",
|
|
requested_at=requested_at,
|
|
completed_at=completed_at,
|
|
status=batch_status,
|
|
total_products=forecast_config["productos_por_tenant"],
|
|
completed_products=forecast_config["productos_por_tenant"] if batch_status == "completed" else 0,
|
|
failed_products=0 if batch_status != "failed" else random.randint(1, 3),
|
|
forecast_days=forecast_days,
|
|
business_type=business_type,
|
|
error_message="Error de conexión con servicio de clima" if batch_status == "failed" else None,
|
|
processing_time_ms=processing_time
|
|
)
|
|
|
|
db.add(batch)
|
|
batches_created += 1
|
|
|
|
await db.flush()
|
|
|
|
# Generate historical forecasts (past 30 days)
|
|
dias_historico = forecast_config["dias_historico"]
|
|
|
|
for product in forecast_config["productos_demo"]:
|
|
product_id = uuid.UUID(product["id"])
|
|
product_name = product["nombre"]
|
|
|
|
for day_offset in range(-dias_historico, 0):
|
|
forecast_date = calculate_datetime_from_offset(day_offset)
|
|
day_of_week = forecast_date.weekday()
|
|
is_weekend = day_of_week >= 5
|
|
|
|
# Generate weather data
|
|
weather_temp = random.uniform(
|
|
forecast_config["factores_externos"]["temperatura"]["min"],
|
|
forecast_config["factores_externos"]["temperatura"]["max"]
|
|
)
|
|
weather_precip = 0.0
|
|
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
|
|
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
|
|
|
|
weather_descriptions = ["Despejado", "Parcialmente nublado", "Nublado", "Lluvia ligera", "Lluvia"]
|
|
weather_desc = random.choice(weather_descriptions)
|
|
|
|
# Traffic volume
|
|
traffic_volume = random.randint(
|
|
forecast_config["factores_externos"]["volumen_trafico"]["min"],
|
|
forecast_config["factores_externos"]["volumen_trafico"]["max"]
|
|
)
|
|
|
|
# Calculate demand
|
|
predicted_demand = calculate_demand(
|
|
product, day_of_week, is_weekend,
|
|
weather_temp, weather_precip, traffic_volume, config
|
|
)
|
|
|
|
# Apply multiplier for central bakery
|
|
predicted_demand *= multiplier
|
|
|
|
# Calculate confidence intervals
|
|
lower_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] / 100.0
|
|
upper_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] / 100.0
|
|
|
|
confidence_lower = predicted_demand * (1.0 - lower_pct)
|
|
confidence_upper = predicted_demand * (1.0 + upper_pct)
|
|
|
|
# Select algorithm
|
|
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
|
|
algorithm = algorithm_choice["algoritmo"]
|
|
|
|
# Processing time
|
|
processing_time = random.randint(
|
|
forecast_config["tiempo_procesamiento_ms"]["min"],
|
|
forecast_config["tiempo_procesamiento_ms"]["max"]
|
|
)
|
|
|
|
# Model info
|
|
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
|
|
model_id = f"{algorithm}_{business_type}_{model_version}"
|
|
|
|
# Create forecast
|
|
forecast = Forecast(
|
|
id=uuid.uuid4(),
|
|
tenant_id=tenant_id,
|
|
inventory_product_id=product_id,
|
|
product_name=product_name,
|
|
location=location,
|
|
forecast_date=forecast_date,
|
|
created_at=forecast_date - timedelta(days=1), # Created day before
|
|
predicted_demand=predicted_demand,
|
|
confidence_lower=confidence_lower,
|
|
confidence_upper=confidence_upper,
|
|
confidence_level=forecast_config["nivel_confianza"],
|
|
model_id=model_id,
|
|
model_version=model_version,
|
|
algorithm=algorithm,
|
|
business_type=business_type,
|
|
day_of_week=day_of_week,
|
|
is_holiday=False, # Could add holiday logic
|
|
is_weekend=is_weekend,
|
|
weather_temperature=weather_temp,
|
|
weather_precipitation=weather_precip,
|
|
weather_description=weather_desc,
|
|
traffic_volume=traffic_volume,
|
|
processing_time_ms=processing_time,
|
|
features_used={
|
|
"day_of_week": True,
|
|
"weather": True,
|
|
"traffic": True,
|
|
"historical_demand": True,
|
|
"seasonality": True
|
|
}
|
|
)
|
|
|
|
db.add(forecast)
|
|
forecasts_created += 1
|
|
|
|
# Generate future forecasts (next 14 days)
|
|
dias_futuro = forecast_config["dias_prevision_futuro"]
|
|
|
|
for product in forecast_config["productos_demo"]:
|
|
product_id = uuid.UUID(product["id"])
|
|
product_name = product["nombre"]
|
|
|
|
for day_offset in range(1, dias_futuro + 1):
|
|
forecast_date = calculate_datetime_from_offset(day_offset)
|
|
day_of_week = forecast_date.weekday()
|
|
is_weekend = day_of_week >= 5
|
|
|
|
# Generate weather forecast data (slightly less certain)
|
|
weather_temp = random.uniform(
|
|
forecast_config["factores_externos"]["temperatura"]["min"],
|
|
forecast_config["factores_externos"]["temperatura"]["max"]
|
|
)
|
|
weather_precip = 0.0
|
|
if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]:
|
|
weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"])
|
|
|
|
weather_desc = random.choice(["Despejado", "Parcialmente nublado", "Nublado"])
|
|
|
|
traffic_volume = random.randint(
|
|
forecast_config["factores_externos"]["volumen_trafico"]["min"],
|
|
forecast_config["factores_externos"]["volumen_trafico"]["max"]
|
|
)
|
|
|
|
# Calculate demand
|
|
predicted_demand = calculate_demand(
|
|
product, day_of_week, is_weekend,
|
|
weather_temp, weather_precip, traffic_volume, config
|
|
)
|
|
|
|
predicted_demand *= multiplier
|
|
|
|
# Wider confidence intervals for future predictions
|
|
lower_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] + 5.0) / 100.0
|
|
upper_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] + 5.0) / 100.0
|
|
|
|
confidence_lower = predicted_demand * (1.0 - lower_pct)
|
|
confidence_upper = predicted_demand * (1.0 + upper_pct)
|
|
|
|
algorithm_choice = weighted_choice(forecast_config["algoritmos"])
|
|
algorithm = algorithm_choice["algoritmo"]
|
|
|
|
processing_time = random.randint(
|
|
forecast_config["tiempo_procesamiento_ms"]["min"],
|
|
forecast_config["tiempo_procesamiento_ms"]["max"]
|
|
)
|
|
|
|
model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}"
|
|
model_id = f"{algorithm}_{business_type}_{model_version}"
|
|
|
|
forecast = Forecast(
|
|
id=uuid.uuid4(),
|
|
tenant_id=tenant_id,
|
|
inventory_product_id=product_id,
|
|
product_name=product_name,
|
|
location=location,
|
|
forecast_date=forecast_date,
|
|
created_at=BASE_REFERENCE_DATE, # Created today
|
|
predicted_demand=predicted_demand,
|
|
confidence_lower=confidence_lower,
|
|
confidence_upper=confidence_upper,
|
|
confidence_level=forecast_config["nivel_confianza"],
|
|
model_id=model_id,
|
|
model_version=model_version,
|
|
algorithm=algorithm,
|
|
business_type=business_type,
|
|
day_of_week=day_of_week,
|
|
is_holiday=False,
|
|
is_weekend=is_weekend,
|
|
weather_temperature=weather_temp,
|
|
weather_precipitation=weather_precip,
|
|
weather_description=weather_desc,
|
|
traffic_volume=traffic_volume,
|
|
processing_time_ms=processing_time,
|
|
features_used={
|
|
"day_of_week": True,
|
|
"weather": True,
|
|
"traffic": True,
|
|
"historical_demand": True,
|
|
"seasonality": True
|
|
}
|
|
)
|
|
|
|
db.add(forecast)
|
|
forecasts_created += 1
|
|
|
|
await db.commit()
|
|
logger.info(f"Successfully created {forecasts_created} forecasts and {batches_created} batches for {tenant_name}")
|
|
|
|
return {
|
|
"tenant_id": str(tenant_id),
|
|
"forecasts_created": forecasts_created,
|
|
"batches_created": batches_created,
|
|
"skipped": False
|
|
}
|
|
|
|
|
|
async def seed_all(db: AsyncSession):
|
|
"""Seed all demo tenants with forecasting data"""
|
|
logger.info("Starting demo forecasting seed process")
|
|
|
|
# Load configuration
|
|
config = load_forecasting_config()
|
|
|
|
results = []
|
|
|
|
# Seed San Pablo (Individual Bakery)
|
|
result_san_pablo = await generate_forecasts_for_tenant(
|
|
db,
|
|
DEMO_TENANT_SAN_PABLO,
|
|
"San Pablo - Individual Bakery",
|
|
"individual_bakery",
|
|
config
|
|
)
|
|
results.append(result_san_pablo)
|
|
|
|
# Seed La Espiga (Central Bakery)
|
|
result_la_espiga = await generate_forecasts_for_tenant(
|
|
db,
|
|
DEMO_TENANT_LA_ESPIGA,
|
|
"La Espiga - Central Bakery",
|
|
"central_bakery",
|
|
config
|
|
)
|
|
results.append(result_la_espiga)
|
|
|
|
total_forecasts = sum(r["forecasts_created"] for r in results)
|
|
total_batches = sum(r["batches_created"] for r in results)
|
|
|
|
return {
|
|
"results": results,
|
|
"total_forecasts_created": total_forecasts,
|
|
"total_batches_created": total_batches,
|
|
"status": "completed"
|
|
}
|
|
|
|
|
|
async def main():
|
|
"""Main execution function"""
|
|
# Get database URL from environment
|
|
database_url = os.getenv("FORECASTING_DATABASE_URL")
|
|
if not database_url:
|
|
logger.error("FORECASTING_DATABASE_URL environment variable must be set")
|
|
return 1
|
|
|
|
# Ensure asyncpg driver
|
|
if database_url.startswith("postgresql://"):
|
|
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
|
|
|
|
# Create async engine
|
|
engine = create_async_engine(database_url, echo=False)
|
|
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
|
|
|
try:
|
|
async with async_session() as session:
|
|
result = await seed_all(session)
|
|
|
|
logger.info(
|
|
"Forecasting seed completed successfully!",
|
|
total_forecasts=result["total_forecasts_created"],
|
|
total_batches=result["total_batches_created"],
|
|
status=result["status"]
|
|
)
|
|
|
|
# Print summary
|
|
print("\n" + "="*60)
|
|
print("DEMO FORECASTING SEED SUMMARY")
|
|
print("="*60)
|
|
for tenant_result in result["results"]:
|
|
tenant_id = tenant_result["tenant_id"]
|
|
forecasts = tenant_result["forecasts_created"]
|
|
batches = tenant_result["batches_created"]
|
|
skipped = tenant_result.get("skipped", False)
|
|
status = "SKIPPED (already exists)" if skipped else f"CREATED {forecasts} forecasts, {batches} batches"
|
|
print(f"Tenant {tenant_id}: {status}")
|
|
print(f"\nTotal Forecasts: {result['total_forecasts_created']}")
|
|
print(f"Total Batches: {result['total_batches_created']}")
|
|
print("="*60 + "\n")
|
|
|
|
return 0
|
|
|
|
except Exception as e:
|
|
logger.error(f"Forecasting seed failed: {str(e)}", exc_info=True)
|
|
return 1
|
|
finally:
|
|
await engine.dispose()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = asyncio.run(main())
|
|
sys.exit(exit_code)
|