#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Demo Forecasting Seeding Script for Forecasting Service Creates demand forecasts and prediction batches for demo template tenants This script runs as a Kubernetes init job inside the forecasting-service container. """ import asyncio import uuid import sys import os import json import random from datetime import datetime, timezone, timedelta from pathlib import Path # Add app to path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from sqlalchemy import select import structlog from app.models.forecasts import Forecast, PredictionBatch # Configure logging logger = structlog.get_logger() # Base demo tenant IDs DEMO_TENANT_SAN_PABLO = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") # Individual bakery DEMO_TENANT_LA_ESPIGA = uuid.UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7") # Central bakery # Base reference date for date calculations BASE_REFERENCE_DATE = datetime(2025, 1, 15, 12, 0, 0, tzinfo=timezone.utc) # Day of week mapping DAYS_OF_WEEK = { 0: "lunes", 1: "martes", 2: "miercoles", 3: "jueves", 4: "viernes", 5: "sabado", 6: "domingo" } def load_forecasting_config(): """Load forecasting configuration from JSON file""" config_file = Path(__file__).parent / "previsiones_config_es.json" if not config_file.exists(): raise FileNotFoundError(f"Forecasting config file not found: {config_file}") with open(config_file, 'r', encoding='utf-8') as f: return json.load(f) def calculate_datetime_from_offset(offset_days: int) -> datetime: """Calculate a datetime based on offset from BASE_REFERENCE_DATE""" return BASE_REFERENCE_DATE + timedelta(days=offset_days) def weighted_choice(choices: list) -> dict: """Make a weighted random choice from list of dicts with 'peso' key""" total_weight = sum(c.get("peso", 1.0) for c in choices) r = random.uniform(0, total_weight) cumulative = 0 for choice in choices: cumulative += choice.get("peso", 1.0) if r <= cumulative: return choice return choices[-1] def calculate_demand( product: dict, day_of_week: int, is_weekend: bool, weather_temp: float, weather_precip: float, traffic_volume: int, config: dict ) -> float: """Calculate predicted demand based on various factors""" # Base demand base_demand = product["demanda_base_diaria"] # Weekly trend factor day_name = DAYS_OF_WEEK[day_of_week] weekly_factor = product["tendencia_semanal"][day_name] # Apply seasonality (simple growth factor for "creciente") seasonality_factor = 1.0 if product["estacionalidad"] == "creciente": seasonality_factor = 1.05 # Weather impact (simple model) weather_factor = 1.0 temp_impact = config["configuracion_previsiones"]["factores_externos"]["temperatura"]["impacto_demanda"] precip_impact = config["configuracion_previsiones"]["factores_externos"]["precipitacion"]["impacto_demanda"] if weather_temp > 22.0: weather_factor += temp_impact * (weather_temp - 22.0) / 10.0 if weather_precip > 0: weather_factor += precip_impact # Traffic correlation traffic_correlation = config["configuracion_previsiones"]["factores_externos"]["volumen_trafico"]["correlacion_demanda"] traffic_factor = 1.0 + (traffic_volume / 1000.0 - 1.0) * traffic_correlation # Calculate predicted demand predicted = base_demand * weekly_factor * seasonality_factor * weather_factor * traffic_factor # Add randomness based on variability variability = product["variabilidad"] predicted = predicted * random.uniform(1.0 - variability, 1.0 + variability) return max(0.0, predicted) async def generate_forecasts_for_tenant( db: AsyncSession, tenant_id: uuid.UUID, tenant_name: str, business_type: str, config: dict ): """Generate forecasts for a specific tenant""" logger.info(f"Generating forecasts for: {tenant_name}", tenant_id=str(tenant_id)) # Check if forecasts already exist result = await db.execute( select(Forecast).where(Forecast.tenant_id == tenant_id).limit(1) ) existing = result.scalar_one_or_none() if existing: logger.info(f"Forecasts already exist for {tenant_name}, skipping seed") return {"tenant_id": str(tenant_id), "forecasts_created": 0, "batches_created": 0, "skipped": True} forecast_config = config["configuracion_previsiones"] batches_config = config["lotes_prediccion"] # Get location for this business type location = forecast_config["ubicaciones"][business_type] # Get multiplier for central bakery multiplier = forecast_config["multiplicador_central_bakery"] if business_type == "central_bakery" else 1.0 forecasts_created = 0 batches_created = 0 # Generate prediction batches first num_batches = batches_config["lotes_por_tenant"] for batch_idx in range(num_batches): # Select batch status status_rand = random.random() cumulative = 0 batch_status = "completed" for status, weight in batches_config["distribucion_estados"].items(): cumulative += weight if status_rand <= cumulative: batch_status = status break # Select forecast days forecast_days = random.choice(batches_config["dias_prevision_lotes"]) # Create batch at different times in the past requested_offset = -(batch_idx + 1) * 10 # Batches every 10 days in the past requested_at = calculate_datetime_from_offset(requested_offset) completed_at = None processing_time = None if batch_status == "completed": processing_time = random.randint(5000, 25000) # 5-25 seconds completed_at = requested_at + timedelta(milliseconds=processing_time) batch = PredictionBatch( id=uuid.uuid4(), tenant_id=tenant_id, batch_name=f"Previsión {forecast_days} días - {requested_at.strftime('%Y%m%d')}", requested_at=requested_at, completed_at=completed_at, status=batch_status, total_products=forecast_config["productos_por_tenant"], completed_products=forecast_config["productos_por_tenant"] if batch_status == "completed" else 0, failed_products=0 if batch_status != "failed" else random.randint(1, 3), forecast_days=forecast_days, business_type=business_type, error_message="Error de conexión con servicio de clima" if batch_status == "failed" else None, processing_time_ms=processing_time ) db.add(batch) batches_created += 1 await db.flush() # Generate historical forecasts (past 30 days) dias_historico = forecast_config["dias_historico"] for product in forecast_config["productos_demo"]: product_id = uuid.UUID(product["id"]) product_name = product["nombre"] for day_offset in range(-dias_historico, 0): forecast_date = calculate_datetime_from_offset(day_offset) day_of_week = forecast_date.weekday() is_weekend = day_of_week >= 5 # Generate weather data weather_temp = random.uniform( forecast_config["factores_externos"]["temperatura"]["min"], forecast_config["factores_externos"]["temperatura"]["max"] ) weather_precip = 0.0 if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]: weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"]) weather_descriptions = ["Despejado", "Parcialmente nublado", "Nublado", "Lluvia ligera", "Lluvia"] weather_desc = random.choice(weather_descriptions) # Traffic volume traffic_volume = random.randint( forecast_config["factores_externos"]["volumen_trafico"]["min"], forecast_config["factores_externos"]["volumen_trafico"]["max"] ) # Calculate demand predicted_demand = calculate_demand( product, day_of_week, is_weekend, weather_temp, weather_precip, traffic_volume, config ) # Apply multiplier for central bakery predicted_demand *= multiplier # Calculate confidence intervals lower_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] / 100.0 upper_pct = forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] / 100.0 confidence_lower = predicted_demand * (1.0 - lower_pct) confidence_upper = predicted_demand * (1.0 + upper_pct) # Select algorithm algorithm_choice = weighted_choice(forecast_config["algoritmos"]) algorithm = algorithm_choice["algoritmo"] # Processing time processing_time = random.randint( forecast_config["tiempo_procesamiento_ms"]["min"], forecast_config["tiempo_procesamiento_ms"]["max"] ) # Model info model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}" model_id = f"{algorithm}_{business_type}_{model_version}" # Create forecast forecast = Forecast( id=uuid.uuid4(), tenant_id=tenant_id, inventory_product_id=product_id, product_name=product_name, location=location, forecast_date=forecast_date, created_at=forecast_date - timedelta(days=1), # Created day before predicted_demand=predicted_demand, confidence_lower=confidence_lower, confidence_upper=confidence_upper, confidence_level=forecast_config["nivel_confianza"], model_id=model_id, model_version=model_version, algorithm=algorithm, business_type=business_type, day_of_week=day_of_week, is_holiday=False, # Could add holiday logic is_weekend=is_weekend, weather_temperature=weather_temp, weather_precipitation=weather_precip, weather_description=weather_desc, traffic_volume=traffic_volume, processing_time_ms=processing_time, features_used={ "day_of_week": True, "weather": True, "traffic": True, "historical_demand": True, "seasonality": True } ) db.add(forecast) forecasts_created += 1 # Generate future forecasts (next 14 days) dias_futuro = forecast_config["dias_prevision_futuro"] for product in forecast_config["productos_demo"]: product_id = uuid.UUID(product["id"]) product_name = product["nombre"] for day_offset in range(1, dias_futuro + 1): forecast_date = calculate_datetime_from_offset(day_offset) day_of_week = forecast_date.weekday() is_weekend = day_of_week >= 5 # Generate weather forecast data (slightly less certain) weather_temp = random.uniform( forecast_config["factores_externos"]["temperatura"]["min"], forecast_config["factores_externos"]["temperatura"]["max"] ) weather_precip = 0.0 if random.random() < forecast_config["factores_externos"]["precipitacion"]["probabilidad_lluvia"]: weather_precip = random.uniform(0.5, forecast_config["factores_externos"]["precipitacion"]["mm_promedio"]) weather_desc = random.choice(["Despejado", "Parcialmente nublado", "Nublado"]) traffic_volume = random.randint( forecast_config["factores_externos"]["volumen_trafico"]["min"], forecast_config["factores_externos"]["volumen_trafico"]["max"] ) # Calculate demand predicted_demand = calculate_demand( product, day_of_week, is_weekend, weather_temp, weather_precip, traffic_volume, config ) predicted_demand *= multiplier # Wider confidence intervals for future predictions lower_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["inferior"] + 5.0) / 100.0 upper_pct = (forecast_config["precision_modelo"]["intervalo_confianza_porcentaje"]["superior"] + 5.0) / 100.0 confidence_lower = predicted_demand * (1.0 - lower_pct) confidence_upper = predicted_demand * (1.0 + upper_pct) algorithm_choice = weighted_choice(forecast_config["algoritmos"]) algorithm = algorithm_choice["algoritmo"] processing_time = random.randint( forecast_config["tiempo_procesamiento_ms"]["min"], forecast_config["tiempo_procesamiento_ms"]["max"] ) model_version = f"v{random.randint(1, 3)}.{random.randint(0, 9)}" model_id = f"{algorithm}_{business_type}_{model_version}" forecast = Forecast( id=uuid.uuid4(), tenant_id=tenant_id, inventory_product_id=product_id, product_name=product_name, location=location, forecast_date=forecast_date, created_at=BASE_REFERENCE_DATE, # Created today predicted_demand=predicted_demand, confidence_lower=confidence_lower, confidence_upper=confidence_upper, confidence_level=forecast_config["nivel_confianza"], model_id=model_id, model_version=model_version, algorithm=algorithm, business_type=business_type, day_of_week=day_of_week, is_holiday=False, is_weekend=is_weekend, weather_temperature=weather_temp, weather_precipitation=weather_precip, weather_description=weather_desc, traffic_volume=traffic_volume, processing_time_ms=processing_time, features_used={ "day_of_week": True, "weather": True, "traffic": True, "historical_demand": True, "seasonality": True } ) db.add(forecast) forecasts_created += 1 await db.commit() logger.info(f"Successfully created {forecasts_created} forecasts and {batches_created} batches for {tenant_name}") return { "tenant_id": str(tenant_id), "forecasts_created": forecasts_created, "batches_created": batches_created, "skipped": False } async def seed_all(db: AsyncSession): """Seed all demo tenants with forecasting data""" logger.info("Starting demo forecasting seed process") # Load configuration config = load_forecasting_config() results = [] # Seed San Pablo (Individual Bakery) result_san_pablo = await generate_forecasts_for_tenant( db, DEMO_TENANT_SAN_PABLO, "San Pablo - Individual Bakery", "individual_bakery", config ) results.append(result_san_pablo) # Seed La Espiga (Central Bakery) result_la_espiga = await generate_forecasts_for_tenant( db, DEMO_TENANT_LA_ESPIGA, "La Espiga - Central Bakery", "central_bakery", config ) results.append(result_la_espiga) total_forecasts = sum(r["forecasts_created"] for r in results) total_batches = sum(r["batches_created"] for r in results) return { "results": results, "total_forecasts_created": total_forecasts, "total_batches_created": total_batches, "status": "completed" } async def main(): """Main execution function""" # Get database URL from environment database_url = os.getenv("FORECASTING_DATABASE_URL") if not database_url: logger.error("FORECASTING_DATABASE_URL environment variable must be set") return 1 # Ensure asyncpg driver if database_url.startswith("postgresql://"): database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1) # Create async engine engine = create_async_engine(database_url, echo=False) async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) try: async with async_session() as session: result = await seed_all(session) logger.info( "Forecasting seed completed successfully!", total_forecasts=result["total_forecasts_created"], total_batches=result["total_batches_created"], status=result["status"] ) # Print summary print("\n" + "="*60) print("DEMO FORECASTING SEED SUMMARY") print("="*60) for tenant_result in result["results"]: tenant_id = tenant_result["tenant_id"] forecasts = tenant_result["forecasts_created"] batches = tenant_result["batches_created"] skipped = tenant_result.get("skipped", False) status = "SKIPPED (already exists)" if skipped else f"CREATED {forecasts} forecasts, {batches} batches" print(f"Tenant {tenant_id}: {status}") print(f"\nTotal Forecasts: {result['total_forecasts_created']}") print(f"Total Batches: {result['total_batches_created']}") print("="*60 + "\n") return 0 except Exception as e: logger.error(f"Forecasting seed failed: {str(e)}", exc_info=True) return 1 finally: await engine.dispose() if __name__ == "__main__": exit_code = asyncio.run(main()) sys.exit(exit_code)