272 lines
9.9 KiB
Python
272 lines
9.9 KiB
Python
|
|
"""
|
||
|
|
Demo AI Models Seed Script
|
||
|
|
Creates fake AI models for demo tenants to populate the models list
|
||
|
|
without having actual trained model files.
|
||
|
|
|
||
|
|
This script uses hardcoded tenant and product IDs to avoid cross-database dependencies.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
import sys
|
||
|
|
import os
|
||
|
|
from uuid import UUID
|
||
|
|
from datetime import datetime, timezone, timedelta
|
||
|
|
from decimal import Decimal
|
||
|
|
|
||
|
|
# Add project root to path
|
||
|
|
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
|
||
|
|
|
||
|
|
from sqlalchemy import select
|
||
|
|
from shared.database.base import create_database_manager
|
||
|
|
import structlog
|
||
|
|
|
||
|
|
# Import models - these paths work both locally and in container
|
||
|
|
try:
|
||
|
|
# Container environment (training-service image)
|
||
|
|
from app.models.training import TrainedModel
|
||
|
|
except ImportError:
|
||
|
|
# Local environment
|
||
|
|
from services.training.app.models.training import TrainedModel
|
||
|
|
|
||
|
|
logger = structlog.get_logger()
|
||
|
|
|
||
|
|
# ============================================================================
|
||
|
|
# HARDCODED DEMO DATA (from seed scripts)
|
||
|
|
# ============================================================================
|
||
|
|
|
||
|
|
# Demo Tenant IDs (from seed_demo_tenants.py)
|
||
|
|
DEMO_TENANT_SAN_PABLO = UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6")
|
||
|
|
DEMO_TENANT_LA_ESPIGA = UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7")
|
||
|
|
|
||
|
|
# Sample Product IDs for each tenant (these should match finished products from inventory seed)
|
||
|
|
# Note: These are example UUIDs - in production, these would be actual product IDs from inventory
|
||
|
|
DEMO_PRODUCTS = {
|
||
|
|
DEMO_TENANT_SAN_PABLO: [
|
||
|
|
{"id": UUID("10000000-0000-0000-0000-000000000001"), "name": "Barra de Pan"},
|
||
|
|
{"id": UUID("10000000-0000-0000-0000-000000000002"), "name": "Croissant"},
|
||
|
|
{"id": UUID("10000000-0000-0000-0000-000000000003"), "name": "Magdalenas"},
|
||
|
|
{"id": UUID("10000000-0000-0000-0000-000000000004"), "name": "Empanada"},
|
||
|
|
{"id": UUID("10000000-0000-0000-0000-000000000005"), "name": "Pan Integral"},
|
||
|
|
],
|
||
|
|
DEMO_TENANT_LA_ESPIGA: [
|
||
|
|
{"id": UUID("20000000-0000-0000-0000-000000000001"), "name": "Pan de Molde"},
|
||
|
|
{"id": UUID("20000000-0000-0000-0000-000000000002"), "name": "Bollo Suizo"},
|
||
|
|
{"id": UUID("20000000-0000-0000-0000-000000000003"), "name": "Palmera de Chocolate"},
|
||
|
|
{"id": UUID("20000000-0000-0000-0000-000000000004"), "name": "Napolitana"},
|
||
|
|
{"id": UUID("20000000-0000-0000-0000-000000000005"), "name": "Pan Rústico"},
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
class DemoAIModelSeeder:
|
||
|
|
"""Seed fake AI models for demo tenants"""
|
||
|
|
|
||
|
|
def __init__(self):
|
||
|
|
self.training_db_url = os.getenv("TRAINING_DATABASE_URL") or os.getenv("DATABASE_URL")
|
||
|
|
|
||
|
|
if not self.training_db_url:
|
||
|
|
raise ValueError("Missing TRAINING_DATABASE_URL or DATABASE_URL")
|
||
|
|
|
||
|
|
# Convert to async URL if needed
|
||
|
|
if self.training_db_url.startswith("postgresql://"):
|
||
|
|
self.training_db_url = self.training_db_url.replace(
|
||
|
|
"postgresql://", "postgresql+asyncpg://", 1
|
||
|
|
)
|
||
|
|
|
||
|
|
self.training_db = create_database_manager(self.training_db_url, "demo-ai-seed")
|
||
|
|
|
||
|
|
async def create_fake_model(self, session, tenant_id: UUID, product_info: dict):
|
||
|
|
"""Create a fake AI model entry for a product"""
|
||
|
|
now = datetime.now(timezone.utc)
|
||
|
|
training_start = now - timedelta(days=90)
|
||
|
|
training_end = now - timedelta(days=7)
|
||
|
|
|
||
|
|
fake_model = TrainedModel(
|
||
|
|
tenant_id=tenant_id,
|
||
|
|
inventory_product_id=product_info["id"],
|
||
|
|
model_type="prophet_optimized",
|
||
|
|
model_version="1.0-demo",
|
||
|
|
job_id=f"demo-job-{tenant_id}-{product_info['id']}",
|
||
|
|
|
||
|
|
# Fake file paths (files don't actually exist)
|
||
|
|
model_path=f"/fake/models/{tenant_id}/{product_info['id']}/model.pkl",
|
||
|
|
metadata_path=f"/fake/models/{tenant_id}/{product_info['id']}/metadata.json",
|
||
|
|
|
||
|
|
# Fake but realistic metrics
|
||
|
|
mape=Decimal("12.5"), # Mean Absolute Percentage Error
|
||
|
|
mae=Decimal("2.3"), # Mean Absolute Error
|
||
|
|
rmse=Decimal("3.1"), # Root Mean Squared Error
|
||
|
|
r2_score=Decimal("0.85"), # R-squared
|
||
|
|
training_samples=60, # 60 days of training data
|
||
|
|
|
||
|
|
# Fake hyperparameters
|
||
|
|
hyperparameters={
|
||
|
|
"changepoint_prior_scale": 0.05,
|
||
|
|
"seasonality_prior_scale": 10.0,
|
||
|
|
"holidays_prior_scale": 10.0,
|
||
|
|
"seasonality_mode": "multiplicative"
|
||
|
|
},
|
||
|
|
|
||
|
|
# Features used
|
||
|
|
features_used=["weekday", "month", "is_holiday", "temperature", "precipitation"],
|
||
|
|
|
||
|
|
# Normalization params (fake)
|
||
|
|
normalization_params={
|
||
|
|
"temperature": {"mean": 15.0, "std": 5.0},
|
||
|
|
"precipitation": {"mean": 2.0, "std": 1.5}
|
||
|
|
},
|
||
|
|
|
||
|
|
# Model status
|
||
|
|
is_active=True,
|
||
|
|
is_production=False, # Demo models are not production-ready
|
||
|
|
|
||
|
|
# Training data info
|
||
|
|
training_start_date=training_start,
|
||
|
|
training_end_date=training_end,
|
||
|
|
data_quality_score=Decimal("0.75"), # Good but not excellent
|
||
|
|
|
||
|
|
# Metadata
|
||
|
|
notes=f"Demo model for {product_info['name']} - No actual trained file exists. For demonstration purposes only.",
|
||
|
|
created_by="demo-seed-script",
|
||
|
|
created_at=now,
|
||
|
|
updated_at=now,
|
||
|
|
last_used_at=None
|
||
|
|
)
|
||
|
|
|
||
|
|
session.add(fake_model)
|
||
|
|
return fake_model
|
||
|
|
|
||
|
|
async def seed_models_for_tenant(self, tenant_id: UUID, tenant_name: str, products: list):
|
||
|
|
"""Create fake AI models for a demo tenant"""
|
||
|
|
logger.info(
|
||
|
|
"Creating fake AI models for demo tenant",
|
||
|
|
tenant_id=str(tenant_id),
|
||
|
|
tenant_name=tenant_name,
|
||
|
|
product_count=len(products)
|
||
|
|
)
|
||
|
|
|
||
|
|
try:
|
||
|
|
async with self.training_db.get_session() as session:
|
||
|
|
models_created = 0
|
||
|
|
|
||
|
|
for product in products:
|
||
|
|
# Check if model already exists
|
||
|
|
result = await session.execute(
|
||
|
|
select(TrainedModel).where(
|
||
|
|
TrainedModel.tenant_id == tenant_id,
|
||
|
|
TrainedModel.inventory_product_id == product["id"]
|
||
|
|
)
|
||
|
|
)
|
||
|
|
existing_model = result.scalars().first()
|
||
|
|
|
||
|
|
if existing_model:
|
||
|
|
logger.info(
|
||
|
|
"Model already exists, skipping",
|
||
|
|
tenant_id=str(tenant_id),
|
||
|
|
product_name=product["name"],
|
||
|
|
product_id=str(product["id"])
|
||
|
|
)
|
||
|
|
continue
|
||
|
|
|
||
|
|
# Create fake model
|
||
|
|
model = await self.create_fake_model(session, tenant_id, product)
|
||
|
|
models_created += 1
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
"Created fake AI model",
|
||
|
|
tenant_id=str(tenant_id),
|
||
|
|
product_name=product["name"],
|
||
|
|
product_id=str(product["id"]),
|
||
|
|
model_id=str(model.id)
|
||
|
|
)
|
||
|
|
|
||
|
|
await session.commit()
|
||
|
|
|
||
|
|
logger.info(
|
||
|
|
"✅ Successfully created fake AI models for tenant",
|
||
|
|
tenant_id=str(tenant_id),
|
||
|
|
tenant_name=tenant_name,
|
||
|
|
models_created=models_created
|
||
|
|
)
|
||
|
|
|
||
|
|
return models_created
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error(
|
||
|
|
"❌ Error creating fake AI models for tenant",
|
||
|
|
tenant_id=str(tenant_id),
|
||
|
|
tenant_name=tenant_name,
|
||
|
|
error=str(e),
|
||
|
|
exc_info=True
|
||
|
|
)
|
||
|
|
raise
|
||
|
|
|
||
|
|
async def seed_all_demo_models(self):
|
||
|
|
"""Seed fake AI models for all demo tenants"""
|
||
|
|
logger.info("=" * 80)
|
||
|
|
logger.info("🤖 Starting Demo AI Models Seeding")
|
||
|
|
logger.info("=" * 80)
|
||
|
|
|
||
|
|
total_models_created = 0
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Seed models for San Pablo
|
||
|
|
san_pablo_count = await self.seed_models_for_tenant(
|
||
|
|
tenant_id=DEMO_TENANT_SAN_PABLO,
|
||
|
|
tenant_name="Panadería San Pablo",
|
||
|
|
products=DEMO_PRODUCTS[DEMO_TENANT_SAN_PABLO]
|
||
|
|
)
|
||
|
|
total_models_created += san_pablo_count
|
||
|
|
|
||
|
|
# Seed models for La Espiga
|
||
|
|
la_espiga_count = await self.seed_models_for_tenant(
|
||
|
|
tenant_id=DEMO_TENANT_LA_ESPIGA,
|
||
|
|
tenant_name="Panadería La Espiga",
|
||
|
|
products=DEMO_PRODUCTS[DEMO_TENANT_LA_ESPIGA]
|
||
|
|
)
|
||
|
|
total_models_created += la_espiga_count
|
||
|
|
|
||
|
|
logger.info("=" * 80)
|
||
|
|
logger.info(
|
||
|
|
"✅ Demo AI Models Seeding Completed",
|
||
|
|
total_models_created=total_models_created,
|
||
|
|
tenants_processed=2
|
||
|
|
)
|
||
|
|
logger.info("=" * 80)
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error("=" * 80)
|
||
|
|
logger.error("❌ Demo AI Models Seeding Failed")
|
||
|
|
logger.error("=" * 80)
|
||
|
|
logger.error("Error: %s", str(e))
|
||
|
|
raise
|
||
|
|
|
||
|
|
|
||
|
|
async def main():
|
||
|
|
"""Main entry point"""
|
||
|
|
logger.info("Demo AI Models Seed Script Starting")
|
||
|
|
logger.info("Mode: %s", os.getenv("DEMO_MODE", "development"))
|
||
|
|
logger.info("Log Level: %s", os.getenv("LOG_LEVEL", "INFO"))
|
||
|
|
|
||
|
|
try:
|
||
|
|
seeder = DemoAIModelSeeder()
|
||
|
|
await seeder.seed_all_demo_models()
|
||
|
|
|
||
|
|
logger.info("")
|
||
|
|
logger.info("🎉 Success! Demo AI models are ready.")
|
||
|
|
logger.info("")
|
||
|
|
logger.info("Note: These are fake models for demo purposes only.")
|
||
|
|
logger.info(" No actual model files exist on disk.")
|
||
|
|
logger.info("")
|
||
|
|
|
||
|
|
return 0
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
logger.error("Demo AI models seed failed", error=str(e), exc_info=True)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
exit_code = asyncio.run(main())
|
||
|
|
sys.exit(exit_code)
|