Files
bakery-ia/scripts/demo/seed_demo_ai_models.py
2025-10-03 14:09:34 +02:00

279 lines
10 KiB
Python

"""
Demo AI Models Seed Script
Creates fake AI models for demo tenants to populate the models list
without having actual trained model files.
"""
import asyncio
import sys
import os
from uuid import UUID
from datetime import datetime, timezone, timedelta
from decimal import Decimal
# Add project root to path
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
from sqlalchemy import select
from shared.database.base import create_database_manager
import structlog
# Import models - these paths work both locally and in container
try:
# Container environment (training-service image)
from app.models.training import TrainedModel
except ImportError:
# Local environment
from services.training.app.models.training import TrainedModel
# Tenant model - define minimal version for container environment
try:
from services.tenant.app.models.tenants import Tenant
except ImportError:
# If running in training-service container, define minimal Tenant model
from sqlalchemy import Column, String, Boolean
from sqlalchemy.dialects.postgresql import UUID as PGUUID
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class Tenant(Base):
__tablename__ = "tenants"
id = Column(PGUUID(as_uuid=True), primary_key=True)
name = Column(String)
is_demo = Column(Boolean)
is_demo_template = Column(Boolean)
logger = structlog.get_logger()
class DemoAIModelSeeder:
"""Seed fake AI models for demo tenants"""
def __init__(self):
self.training_db_url = os.getenv("TRAINING_DATABASE_URL")
self.tenant_db_url = os.getenv("TENANT_DATABASE_URL")
if not self.training_db_url or not self.tenant_db_url:
raise ValueError("Missing required database URLs")
self.training_db = create_database_manager(self.training_db_url, "demo-ai-seed")
self.tenant_db = create_database_manager(self.tenant_db_url, "demo-tenant-seed")
async def get_demo_tenants(self):
"""Get all demo tenants"""
async with self.tenant_db.get_session() as session:
result = await session.execute(
select(Tenant).where(Tenant.is_demo == True, Tenant.is_demo_template == True)
)
return result.scalars().all()
async def get_tenant_products(self, tenant_id: UUID):
"""
Get finished products for a tenant from inventory database.
We need to query the actual inventory to get real product UUIDs.
"""
try:
inventory_db_url = os.getenv("INVENTORY_DATABASE_URL")
if not inventory_db_url:
logger.warning("INVENTORY_DATABASE_URL not set, cannot get products")
return []
inventory_db = create_database_manager(inventory_db_url, "demo-inventory-check")
# Define minimal Ingredient model for querying
from sqlalchemy import Column, String, Enum as SQLEnum
from sqlalchemy.dialects.postgresql import UUID as PGUUID
from sqlalchemy.ext.declarative import declarative_base
import enum
Base = declarative_base()
class IngredientType(str, enum.Enum):
INGREDIENT = "INGREDIENT"
FINISHED_PRODUCT = "FINISHED_PRODUCT"
class Ingredient(Base):
__tablename__ = "ingredients"
id = Column(PGUUID(as_uuid=True), primary_key=True)
tenant_id = Column(PGUUID(as_uuid=True))
name = Column(String)
ingredient_type = Column(SQLEnum(IngredientType, name="ingredienttype"))
async with inventory_db.get_session() as session:
result = await session.execute(
select(Ingredient).where(
Ingredient.tenant_id == tenant_id,
Ingredient.ingredient_type == IngredientType.FINISHED_PRODUCT
).limit(10) # Get up to 10 finished products
)
products = result.scalars().all()
product_list = [
{"id": product.id, "name": product.name}
for product in products
]
logger.info(f"Found {len(product_list)} finished products for tenant",
tenant_id=str(tenant_id))
return product_list
except Exception as e:
logger.error("Error fetching tenant products", error=str(e), tenant_id=str(tenant_id))
return []
async def create_fake_model(self, session, tenant_id: UUID, product_info: dict):
"""Create a fake AI model entry for a product"""
now = datetime.now(timezone.utc)
training_start = now - timedelta(days=90)
training_end = now - timedelta(days=7)
fake_model = TrainedModel(
tenant_id=tenant_id,
inventory_product_id=product_info["id"],
model_type="prophet_optimized",
model_version="1.0-demo",
job_id=f"demo-job-{tenant_id}-{product_info['id']}",
# Fake file paths (files don't actually exist)
model_path=f"/fake/models/{tenant_id}/{product_info['id']}/model.pkl",
metadata_path=f"/fake/models/{tenant_id}/{product_info['id']}/metadata.json",
# Fake but realistic metrics
mape=Decimal("12.5"), # Mean Absolute Percentage Error
mae=Decimal("2.3"), # Mean Absolute Error
rmse=Decimal("3.1"), # Root Mean Squared Error
r2_score=Decimal("0.85"), # R-squared
training_samples=60, # 60 days of training data
# Fake hyperparameters
hyperparameters={
"changepoint_prior_scale": 0.05,
"seasonality_prior_scale": 10.0,
"holidays_prior_scale": 10.0,
"seasonality_mode": "multiplicative"
},
# Features used
features_used=["weekday", "month", "is_holiday", "temperature", "precipitation"],
# Normalization params (fake)
normalization_params={
"temperature": {"mean": 15.0, "std": 5.0},
"precipitation": {"mean": 2.0, "std": 1.5}
},
# Model status
is_active=True,
is_production=False, # Demo models are not production-ready
# Training data info
training_start_date=training_start,
training_end_date=training_end,
data_quality_score=Decimal("0.75"), # Good but not excellent
# Metadata
notes="Demo model - No actual trained file exists. For demonstration purposes only.",
created_by="demo-seed-script",
created_at=now,
updated_at=now,
last_used_at=None
)
session.add(fake_model)
return fake_model
async def seed_models_for_tenant(self, tenant: Tenant):
"""Create fake AI models for a demo tenant"""
logger.info("Creating fake AI models for demo tenant",
tenant_id=str(tenant.id),
tenant_name=tenant.name)
try:
# Get products for this tenant
products = await self.get_tenant_products(tenant.id)
async with self.training_db.get_session() as session:
models_created = 0
for product in products:
# Check if model already exists
result = await session.execute(
select(TrainedModel).where(
TrainedModel.tenant_id == tenant.id,
TrainedModel.inventory_product_id == product["id"]
)
)
existing_model = result.scalars().first()
if existing_model:
logger.info("Model already exists, skipping",
tenant_id=str(tenant.id),
product_id=product["id"])
continue
# Create fake model
model = await self.create_fake_model(session, tenant.id, product)
models_created += 1
logger.info("Created fake AI model",
tenant_id=str(tenant.id),
product_id=product["id"],
model_id=str(model.id))
await session.commit()
logger.info("Successfully created fake AI models for tenant",
tenant_id=str(tenant.id),
models_created=models_created)
except Exception as e:
logger.error("Error creating fake AI models for tenant",
tenant_id=str(tenant.id),
error=str(e))
raise
async def seed_all_demo_models(self):
"""Seed fake AI models for all demo tenants"""
logger.info("Starting demo AI models seeding")
try:
# Get all demo tenants
demo_tenants = await self.get_demo_tenants()
if not demo_tenants:
logger.warning("No demo tenants found")
return
logger.info(f"Found {len(demo_tenants)} demo tenants")
# Seed models for each tenant
for tenant in demo_tenants:
await self.seed_models_for_tenant(tenant)
logger.info("✅ Demo AI models seeding completed successfully",
tenants_processed=len(demo_tenants))
except Exception as e:
logger.error("❌ Demo AI models seeding failed", error=str(e))
raise
async def main():
"""Main entry point"""
logger.info("Demo AI Models Seed Script started")
try:
seeder = DemoAIModelSeeder()
await seeder.seed_all_demo_models()
logger.info("Demo AI models seed completed successfully")
except Exception as e:
logger.error("Demo AI models seed failed", error=str(e))
sys.exit(1)
if __name__ == "__main__":
asyncio.run(main())