Files
bakery-ia/services/training/scripts/demo/seed_demo_ai_models.py
2025-10-12 18:47:33 +02:00

272 lines
9.9 KiB
Python

"""
Demo AI Models Seed Script
Creates fake AI models for demo tenants to populate the models list
without having actual trained model files.
This script uses hardcoded tenant and product IDs to avoid cross-database dependencies.
"""
import asyncio
import sys
import os
from uuid import UUID
from datetime import datetime, timezone, timedelta
from decimal import Decimal
# Add project root to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
from sqlalchemy import select
from shared.database.base import create_database_manager
import structlog
# Import models - these paths work both locally and in container
try:
# Container environment (training-service image)
from app.models.training import TrainedModel
except ImportError:
# Local environment
from services.training.app.models.training import TrainedModel
logger = structlog.get_logger()
# ============================================================================
# HARDCODED DEMO DATA (from seed scripts)
# ============================================================================
# Demo Tenant IDs (from seed_demo_tenants.py)
DEMO_TENANT_SAN_PABLO = UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6")
DEMO_TENANT_LA_ESPIGA = UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7")
# Sample Product IDs for each tenant (these should match finished products from inventory seed)
# Note: These are example UUIDs - in production, these would be actual product IDs from inventory
DEMO_PRODUCTS = {
DEMO_TENANT_SAN_PABLO: [
{"id": UUID("10000000-0000-0000-0000-000000000001"), "name": "Barra de Pan"},
{"id": UUID("10000000-0000-0000-0000-000000000002"), "name": "Croissant"},
{"id": UUID("10000000-0000-0000-0000-000000000003"), "name": "Magdalenas"},
{"id": UUID("10000000-0000-0000-0000-000000000004"), "name": "Empanada"},
{"id": UUID("10000000-0000-0000-0000-000000000005"), "name": "Pan Integral"},
],
DEMO_TENANT_LA_ESPIGA: [
{"id": UUID("20000000-0000-0000-0000-000000000001"), "name": "Pan de Molde"},
{"id": UUID("20000000-0000-0000-0000-000000000002"), "name": "Bollo Suizo"},
{"id": UUID("20000000-0000-0000-0000-000000000003"), "name": "Palmera de Chocolate"},
{"id": UUID("20000000-0000-0000-0000-000000000004"), "name": "Napolitana"},
{"id": UUID("20000000-0000-0000-0000-000000000005"), "name": "Pan Rústico"},
]
}
class DemoAIModelSeeder:
"""Seed fake AI models for demo tenants"""
def __init__(self):
self.training_db_url = os.getenv("TRAINING_DATABASE_URL") or os.getenv("DATABASE_URL")
if not self.training_db_url:
raise ValueError("Missing TRAINING_DATABASE_URL or DATABASE_URL")
# Convert to async URL if needed
if self.training_db_url.startswith("postgresql://"):
self.training_db_url = self.training_db_url.replace(
"postgresql://", "postgresql+asyncpg://", 1
)
self.training_db = create_database_manager(self.training_db_url, "demo-ai-seed")
async def create_fake_model(self, session, tenant_id: UUID, product_info: dict):
"""Create a fake AI model entry for a product"""
now = datetime.now(timezone.utc)
training_start = now - timedelta(days=90)
training_end = now - timedelta(days=7)
fake_model = TrainedModel(
tenant_id=tenant_id,
inventory_product_id=product_info["id"],
model_type="prophet_optimized",
model_version="1.0-demo",
job_id=f"demo-job-{tenant_id}-{product_info['id']}",
# Fake file paths (files don't actually exist)
model_path=f"/fake/models/{tenant_id}/{product_info['id']}/model.pkl",
metadata_path=f"/fake/models/{tenant_id}/{product_info['id']}/metadata.json",
# Fake but realistic metrics
mape=Decimal("12.5"), # Mean Absolute Percentage Error
mae=Decimal("2.3"), # Mean Absolute Error
rmse=Decimal("3.1"), # Root Mean Squared Error
r2_score=Decimal("0.85"), # R-squared
training_samples=60, # 60 days of training data
# Fake hyperparameters
hyperparameters={
"changepoint_prior_scale": 0.05,
"seasonality_prior_scale": 10.0,
"holidays_prior_scale": 10.0,
"seasonality_mode": "multiplicative"
},
# Features used
features_used=["weekday", "month", "is_holiday", "temperature", "precipitation"],
# Normalization params (fake)
normalization_params={
"temperature": {"mean": 15.0, "std": 5.0},
"precipitation": {"mean": 2.0, "std": 1.5}
},
# Model status
is_active=True,
is_production=False, # Demo models are not production-ready
# Training data info
training_start_date=training_start,
training_end_date=training_end,
data_quality_score=Decimal("0.75"), # Good but not excellent
# Metadata
notes=f"Demo model for {product_info['name']} - No actual trained file exists. For demonstration purposes only.",
created_by="demo-seed-script",
created_at=now,
updated_at=now,
last_used_at=None
)
session.add(fake_model)
return fake_model
async def seed_models_for_tenant(self, tenant_id: UUID, tenant_name: str, products: list):
"""Create fake AI models for a demo tenant"""
logger.info(
"Creating fake AI models for demo tenant",
tenant_id=str(tenant_id),
tenant_name=tenant_name,
product_count=len(products)
)
try:
async with self.training_db.get_session() as session:
models_created = 0
for product in products:
# Check if model already exists
result = await session.execute(
select(TrainedModel).where(
TrainedModel.tenant_id == tenant_id,
TrainedModel.inventory_product_id == product["id"]
)
)
existing_model = result.scalars().first()
if existing_model:
logger.info(
"Model already exists, skipping",
tenant_id=str(tenant_id),
product_name=product["name"],
product_id=str(product["id"])
)
continue
# Create fake model
model = await self.create_fake_model(session, tenant_id, product)
models_created += 1
logger.info(
"Created fake AI model",
tenant_id=str(tenant_id),
product_name=product["name"],
product_id=str(product["id"]),
model_id=str(model.id)
)
await session.commit()
logger.info(
"✅ Successfully created fake AI models for tenant",
tenant_id=str(tenant_id),
tenant_name=tenant_name,
models_created=models_created
)
return models_created
except Exception as e:
logger.error(
"❌ Error creating fake AI models for tenant",
tenant_id=str(tenant_id),
tenant_name=tenant_name,
error=str(e),
exc_info=True
)
raise
async def seed_all_demo_models(self):
"""Seed fake AI models for all demo tenants"""
logger.info("=" * 80)
logger.info("🤖 Starting Demo AI Models Seeding")
logger.info("=" * 80)
total_models_created = 0
try:
# Seed models for San Pablo
san_pablo_count = await self.seed_models_for_tenant(
tenant_id=DEMO_TENANT_SAN_PABLO,
tenant_name="Panadería San Pablo",
products=DEMO_PRODUCTS[DEMO_TENANT_SAN_PABLO]
)
total_models_created += san_pablo_count
# Seed models for La Espiga
la_espiga_count = await self.seed_models_for_tenant(
tenant_id=DEMO_TENANT_LA_ESPIGA,
tenant_name="Panadería La Espiga",
products=DEMO_PRODUCTS[DEMO_TENANT_LA_ESPIGA]
)
total_models_created += la_espiga_count
logger.info("=" * 80)
logger.info(
"✅ Demo AI Models Seeding Completed",
total_models_created=total_models_created,
tenants_processed=2
)
logger.info("=" * 80)
except Exception as e:
logger.error("=" * 80)
logger.error("❌ Demo AI Models Seeding Failed")
logger.error("=" * 80)
logger.error("Error: %s", str(e))
raise
async def main():
"""Main entry point"""
logger.info("Demo AI Models Seed Script Starting")
logger.info("Mode: %s", os.getenv("DEMO_MODE", "development"))
logger.info("Log Level: %s", os.getenv("LOG_LEVEL", "INFO"))
try:
seeder = DemoAIModelSeeder()
await seeder.seed_all_demo_models()
logger.info("")
logger.info("🎉 Success! Demo AI models are ready.")
logger.info("")
logger.info("Note: These are fake models for demo purposes only.")
logger.info(" No actual model files exist on disk.")
logger.info("")
return 0
except Exception as e:
logger.error("Demo AI models seed failed", error=str(e), exc_info=True)
return 1
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)