Files
bakery-ia/services/production/scripts/demo/seed_demo_batches.py
2025-11-27 15:52:40 +01:00

348 lines
13 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Demo Production Batches Seeding Script for Production Service
Creates production batches for demo template tenants
This script runs as a Kubernetes init job inside the production-service container.
"""
import asyncio
import uuid
import sys
import os
import json
from datetime import datetime, timezone, timedelta
from pathlib import Path
# Add app to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import structlog
from app.models.production import ProductionBatch, ProductionStatus, ProductionPriority, ProcessStage
# Import reasoning helper functions for i18n support
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.schemas.reasoning_types import create_batch_reasoning_forecast_demand, create_batch_reasoning_regular_schedule
# Configure logging
logger = structlog.get_logger()
# Base demo tenant IDs
DEMO_TENANT_SAN_PABLO = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") # Individual bakery
DEMO_TENANT_LA_ESPIGA = uuid.UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7") # Central bakery
# Base reference date for date calculations
# MUST match shared/utils/demo_dates.py for proper demo session cloning
# This fixed date allows demo sessions to adjust all dates relative to session creation time
# IMPORTANT: Must match the actual dates in seed data (production batches start Jan 8, 2025)
BASE_REFERENCE_DATE = datetime(2025, 1, 8, 6, 0, 0, tzinfo=timezone.utc)
def load_batches_data():
"""Load production batches data from JSON file"""
data_file = Path(__file__).parent / "lotes_produccion_es.json"
if not data_file.exists():
raise FileNotFoundError(f"Production batches data file not found: {data_file}")
with open(data_file, 'r', encoding='utf-8') as f:
return json.load(f)
def calculate_datetime_from_offset(offset_days: int, hour: int, minute: int) -> datetime:
"""Calculate a datetime based on offset from BASE_REFERENCE_DATE"""
base_date = BASE_REFERENCE_DATE.replace(hour=hour, minute=minute, second=0, microsecond=0)
return base_date + timedelta(days=offset_days)
def map_status(status_str: str) -> ProductionStatus:
"""Map status string to enum"""
mapping = {
"PENDING": ProductionStatus.PENDING,
"IN_PROGRESS": ProductionStatus.IN_PROGRESS,
"COMPLETED": ProductionStatus.COMPLETED,
"CANCELLED": ProductionStatus.CANCELLED,
"ON_HOLD": ProductionStatus.ON_HOLD,
"QUALITY_CHECK": ProductionStatus.QUALITY_CHECK,
"FAILED": ProductionStatus.FAILED
}
return mapping.get(status_str, ProductionStatus.PENDING)
def map_priority(priority_str: str) -> ProductionPriority:
"""Map priority string to enum"""
mapping = {
"LOW": ProductionPriority.LOW,
"MEDIUM": ProductionPriority.MEDIUM,
"HIGH": ProductionPriority.HIGH,
"URGENT": ProductionPriority.URGENT
}
return mapping.get(priority_str, ProductionPriority.MEDIUM)
def map_process_stage(stage_str: str) -> ProcessStage:
"""Map process stage string to enum"""
if not stage_str:
return None
mapping = {
"mixing": ProcessStage.MIXING,
"proofing": ProcessStage.PROOFING,
"shaping": ProcessStage.SHAPING,
"baking": ProcessStage.BAKING,
"cooling": ProcessStage.COOLING,
"packaging": ProcessStage.PACKAGING,
"finishing": ProcessStage.FINISHING
}
return mapping.get(stage_str, None)
async def seed_batches_for_tenant(
db: AsyncSession,
tenant_id: uuid.UUID,
tenant_name: str,
batches_list: list
):
"""Seed production batches for a specific tenant"""
logger.info(f"Seeding production batches for: {tenant_name}", tenant_id=str(tenant_id))
# Check if batches already exist
result = await db.execute(
select(ProductionBatch).where(ProductionBatch.tenant_id == tenant_id).limit(1)
)
existing = result.scalar_one_or_none()
if existing:
logger.info(f"Production batches already exist for {tenant_name}, skipping seed")
return {"tenant_id": str(tenant_id), "batches_created": 0, "skipped": True}
count = 0
for batch_data in batches_list:
# Calculate planned start and end times
planned_start = calculate_datetime_from_offset(
batch_data["planned_start_offset_days"],
batch_data["planned_start_hour"],
batch_data["planned_start_minute"]
)
planned_end = planned_start + timedelta(minutes=batch_data["planned_duration_minutes"])
# Calculate actual times for completed batches
actual_start = None
actual_end = None
completed_at = None
actual_duration = None
if batch_data["status"] in ["COMPLETED", "QUALITY_CHECK"]:
actual_start = planned_start # Assume started on time
actual_duration = batch_data["planned_duration_minutes"]
actual_end = actual_start + timedelta(minutes=actual_duration)
completed_at = actual_end
elif batch_data["status"] == "IN_PROGRESS":
# For IN_PROGRESS batches, set actual_start to a recent time to ensure valid progress calculation
# If planned_start is in the past, use it; otherwise, set to 30 minutes ago
now = datetime.now(timezone.utc)
if planned_start < now:
# If planned start was in the past, use a time that ensures batch is ~30% complete
elapsed_time_minutes = min(
int(batch_data["planned_duration_minutes"] * 0.3),
int((now - planned_start).total_seconds() / 60)
)
actual_start = now - timedelta(minutes=elapsed_time_minutes)
else:
# If planned_start is in the future, start batch 30 minutes ago
actual_start = now - timedelta(minutes=30)
actual_duration = None
actual_end = None
# For San Pablo, use original IDs. For La Espiga, generate new UUIDs
if tenant_id == DEMO_TENANT_SAN_PABLO:
batch_id = uuid.UUID(batch_data["id"])
else:
# Generate deterministic UUID for La Espiga based on original ID
base_uuid = uuid.UUID(batch_data["id"])
# Add a fixed offset to create a unique but deterministic ID
batch_id = uuid.UUID(int=base_uuid.int + 0x10000000000000000000000000000000)
# Map enums
status = map_status(batch_data["status"])
priority = map_priority(batch_data["priority"])
current_stage = map_process_stage(batch_data.get("current_process_stage"))
# Create unique batch number for each tenant
if tenant_id == DEMO_TENANT_SAN_PABLO:
batch_number = batch_data["batch_number"]
else:
# For La Espiga, append tenant suffix to make batch number unique
batch_number = batch_data["batch_number"] + "-LE"
# Generate structured reasoning_data for i18n support
reasoning_data = None
try:
# Use forecast demand reasoning for most batches
if batch_data.get("is_ai_assisted") or priority in [ProductionPriority.HIGH, ProductionPriority.URGENT]:
reasoning_data = create_batch_reasoning_forecast_demand(
product_name=batch_data["product_name"],
predicted_demand=batch_data["planned_quantity"],
current_stock=int(batch_data["planned_quantity"] * 0.3), # Demo: assume 30% current stock
production_needed=batch_data["planned_quantity"],
target_date=planned_start.date().isoformat(),
confidence_score=0.85 if batch_data.get("is_ai_assisted") else 0.75
)
else:
# Regular schedule reasoning for standard batches
reasoning_data = create_batch_reasoning_regular_schedule(
product_name=batch_data["product_name"],
schedule_frequency="daily",
batch_size=batch_data["planned_quantity"]
)
except Exception as e:
logger.warning(f"Failed to generate reasoning_data for batch {batch_number}: {e}")
# Create production batch
batch = ProductionBatch(
id=batch_id,
tenant_id=tenant_id,
batch_number=batch_number,
product_id=uuid.UUID(batch_data["product_id"]),
product_name=batch_data["product_name"],
recipe_id=uuid.UUID(batch_data["recipe_id"]) if batch_data.get("recipe_id") else None,
planned_start_time=planned_start,
planned_end_time=planned_end,
planned_quantity=batch_data["planned_quantity"],
planned_duration_minutes=batch_data["planned_duration_minutes"],
actual_start_time=actual_start,
actual_end_time=actual_end,
actual_quantity=batch_data.get("actual_quantity"),
actual_duration_minutes=actual_duration,
status=status,
priority=priority,
current_process_stage=current_stage,
yield_percentage=batch_data.get("yield_percentage"),
quality_score=batch_data.get("quality_score"),
waste_quantity=batch_data.get("waste_quantity"),
defect_quantity=batch_data.get("defect_quantity"),
estimated_cost=batch_data.get("estimated_cost"),
actual_cost=batch_data.get("actual_cost"),
labor_cost=batch_data.get("labor_cost"),
material_cost=batch_data.get("material_cost"),
overhead_cost=batch_data.get("overhead_cost"),
equipment_used=batch_data.get("equipment_used"),
station_id=batch_data.get("station_id"),
is_rush_order=batch_data.get("is_rush_order", False),
is_special_recipe=batch_data.get("is_special_recipe", False),
is_ai_assisted=batch_data.get("is_ai_assisted", False),
waste_defect_type=batch_data.get("waste_defect_type"),
production_notes=batch_data.get("production_notes"),
quality_notes=batch_data.get("quality_notes"),
reasoning_data=reasoning_data, # Structured reasoning for i18n support
created_at=BASE_REFERENCE_DATE,
updated_at=BASE_REFERENCE_DATE,
completed_at=completed_at
)
db.add(batch)
count += 1
logger.debug(f"Created production batch: {batch.batch_number}", batch_id=str(batch.id))
await db.commit()
logger.info(f"Successfully created {count} production batches for {tenant_name}")
return {
"tenant_id": str(tenant_id),
"batches_created": count,
"skipped": False
}
async def seed_all(db: AsyncSession):
"""Seed all demo tenants with production batches"""
logger.info("Starting demo production batches seed process")
# Load batches data
data = load_batches_data()
results = []
# Both tenants get the same production batches
result_san_pablo = await seed_batches_for_tenant(
db,
DEMO_TENANT_SAN_PABLO,
"San Pablo - Individual Bakery",
data["lotes_produccion"]
)
results.append(result_san_pablo)
result_la_espiga = await seed_batches_for_tenant(
db,
DEMO_TENANT_LA_ESPIGA,
"La Espiga - Central Bakery",
data["lotes_produccion"]
)
results.append(result_la_espiga)
total_created = sum(r["batches_created"] for r in results)
return {
"results": results,
"total_batches_created": total_created,
"status": "completed"
}
async def main():
"""Main execution function"""
# Get database URL from environment
database_url = os.getenv("PRODUCTION_DATABASE_URL")
if not database_url:
logger.error("PRODUCTION_DATABASE_URL environment variable must be set")
return 1
# Ensure asyncpg driver
if database_url.startswith("postgresql://"):
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
# Create async engine
engine = create_async_engine(database_url, echo=False)
async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
try:
async with async_session() as session:
result = await seed_all(session)
logger.info(
"Production batches seed completed successfully!",
total_batches=result["total_batches_created"],
status=result["status"]
)
# Print summary
print("\n" + "="*60)
print("DEMO PRODUCTION BATCHES SEED SUMMARY")
print("="*60)
for tenant_result in result["results"]:
tenant_id = tenant_result["tenant_id"]
count = tenant_result["batches_created"]
skipped = tenant_result.get("skipped", False)
status = "SKIPPED (already exists)" if skipped else f"CREATED {count} batches"
print(f"Tenant {tenant_id}: {status}")
print(f"\nTotal Batches Created: {result['total_batches_created']}")
print("="*60 + "\n")
return 0
except Exception as e:
logger.error(f"Production batches seed failed: {str(e)}", exc_info=True)
return 1
finally:
await engine.dispose()
if __name__ == "__main__":
exit_code = asyncio.run(main())
sys.exit(exit_code)