bakery-ia/services/production/scripts/demo/seed_demo_batches.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Demo Production Batches Seeding Script for Production Service
Creates production batches for demo template tenants

This script runs as a Kubernetes init job inside the production-service container.
"""

import asyncio
import uuid
import sys
import os
import json
from datetime import datetime, timezone, timedelta
from pathlib import Path

# Add app to path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))

from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select
import structlog

from app.models.production import ProductionBatch, ProductionStatus, ProductionPriority, ProcessStage

# Import reasoning helper functions for i18n support
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from shared.schemas.reasoning_types import create_batch_reasoning_forecast_demand, create_batch_reasoning_regular_schedule

# Configure logging
logger = structlog.get_logger()

# Base demo tenant IDs
DEMO_TENANT_SAN_PABLO = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6")  # Individual bakery
DEMO_TENANT_LA_ESPIGA = uuid.UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7")  # Central bakery

# Base reference date for date calculations
# MUST match shared/utils/demo_dates.py for proper demo session cloning
# This fixed date allows demo sessions to adjust all dates relative to session creation time
# IMPORTANT: Must match the actual dates in seed data (production batches start Jan 8, 2025)
BASE_REFERENCE_DATE = datetime(2025, 1, 8, 6, 0, 0, tzinfo=timezone.utc)


def load_batches_data():
    """Load production batches data from JSON file"""
    data_file = Path(__file__).parent / "lotes_produccion_es.json"
    if not data_file.exists():
        raise FileNotFoundError(f"Production batches data file not found: {data_file}")

    with open(data_file, 'r', encoding='utf-8') as f:
        return json.load(f)


def calculate_datetime_from_offset(offset_days: int, hour: int, minute: int) -> datetime:
    """Calculate a datetime based on offset from BASE_REFERENCE_DATE"""
    base_date = BASE_REFERENCE_DATE.replace(hour=hour, minute=minute, second=0, microsecond=0)
    return base_date + timedelta(days=offset_days)


def map_status(status_str: str) -> ProductionStatus:
    """Map status string to enum"""
    mapping = {
        "PENDING": ProductionStatus.PENDING,
        "IN_PROGRESS": ProductionStatus.IN_PROGRESS,
        "COMPLETED": ProductionStatus.COMPLETED,
        "CANCELLED": ProductionStatus.CANCELLED,
        "ON_HOLD": ProductionStatus.ON_HOLD,
        "QUALITY_CHECK": ProductionStatus.QUALITY_CHECK,
        "FAILED": ProductionStatus.FAILED
    }
    return mapping.get(status_str, ProductionStatus.PENDING)


def map_priority(priority_str: str) -> ProductionPriority:
    """Map priority string to enum"""
    mapping = {
        "LOW": ProductionPriority.LOW,
        "MEDIUM": ProductionPriority.MEDIUM,
        "HIGH": ProductionPriority.HIGH,
        "URGENT": ProductionPriority.URGENT
    }
    return mapping.get(priority_str, ProductionPriority.MEDIUM)


def map_process_stage(stage_str: str) -> ProcessStage:
    """Map process stage string to enum"""
    if not stage_str:
        return None

    mapping = {
        "mixing": ProcessStage.MIXING,
        "proofing": ProcessStage.PROOFING,
        "shaping": ProcessStage.SHAPING,
        "baking": ProcessStage.BAKING,
        "cooling": ProcessStage.COOLING,
        "packaging": ProcessStage.PACKAGING,
        "finishing": ProcessStage.FINISHING
    }
    return mapping.get(stage_str, None)


async def seed_batches_for_tenant(
    db: AsyncSession,
    tenant_id: uuid.UUID,
    tenant_name: str,
    batches_list: list
):
    """Seed production batches for a specific tenant"""
    logger.info(f"Seeding production batches for: {tenant_name}", tenant_id=str(tenant_id))

    # Check if batches already exist
    result = await db.execute(
        select(ProductionBatch).where(ProductionBatch.tenant_id == tenant_id).limit(1)
    )
    existing = result.scalar_one_or_none()

    if existing:
        logger.info(f"Production batches already exist for {tenant_name}, skipping seed")
        return {"tenant_id": str(tenant_id), "batches_created": 0, "skipped": True}

    count = 0
    for batch_data in batches_list:
        # Calculate planned start and end times
        planned_start = calculate_datetime_from_offset(
            batch_data["planned_start_offset_days"],
            batch_data["planned_start_hour"],
            batch_data["planned_start_minute"]
        )

        planned_end = planned_start + timedelta(minutes=batch_data["planned_duration_minutes"])

        # Calculate actual times for completed batches
        actual_start = None
        actual_end = None
        completed_at = None
        actual_duration = None

        if batch_data["status"] in ["COMPLETED", "QUALITY_CHECK"]:
            actual_start = planned_start  # Assume started on time
            actual_duration = batch_data["planned_duration_minutes"]
            actual_end = actual_start + timedelta(minutes=actual_duration)
            completed_at = actual_end
        elif batch_data["status"] == "IN_PROGRESS":
            # For IN_PROGRESS batches, set actual_start to a recent time to ensure valid progress calculation
            # If planned_start is in the past, use it; otherwise, set to 30 minutes ago
            now = datetime.now(timezone.utc)
            if planned_start < now:
                # If planned start was in the past, use a time that ensures batch is ~30% complete
                elapsed_time_minutes = min(
                    int(batch_data["planned_duration_minutes"] * 0.3),
                    int((now - planned_start).total_seconds() / 60)
                )
                actual_start = now - timedelta(minutes=elapsed_time_minutes)
            else:
                # If planned_start is in the future, start batch 30 minutes ago
                actual_start = now - timedelta(minutes=30)
            actual_duration = None
            actual_end = None

        # For San Pablo, use original IDs. For La Espiga, generate new UUIDs
        if tenant_id == DEMO_TENANT_SAN_PABLO:
            batch_id = uuid.UUID(batch_data["id"])
        else:
            # Generate deterministic UUID for La Espiga based on original ID
            base_uuid = uuid.UUID(batch_data["id"])
            # Add a fixed offset to create a unique but deterministic ID
            batch_id = uuid.UUID(int=base_uuid.int + 0x10000000000000000000000000000000)

        # Map enums
        status = map_status(batch_data["status"])
        priority = map_priority(batch_data["priority"])
        current_stage = map_process_stage(batch_data.get("current_process_stage"))

        # Create unique batch number for each tenant
        if tenant_id == DEMO_TENANT_SAN_PABLO:
            batch_number = batch_data["batch_number"]
        else:
            # For La Espiga, append tenant suffix to make batch number unique
            batch_number = batch_data["batch_number"] + "-LE"

        # Generate structured reasoning_data for i18n support
        reasoning_data = None
        try:
            # Use forecast demand reasoning for most batches
            if batch_data.get("is_ai_assisted") or priority in [ProductionPriority.HIGH, ProductionPriority.URGENT]:
                reasoning_data = create_batch_reasoning_forecast_demand(
                    product_name=batch_data["product_name"],
                    predicted_demand=batch_data["planned_quantity"],
                    current_stock=int(batch_data["planned_quantity"] * 0.3),  # Demo: assume 30% current stock
                    production_needed=batch_data["planned_quantity"],
                    target_date=planned_start.date().isoformat(),
                    confidence_score=0.85 if batch_data.get("is_ai_assisted") else 0.75
                )
            else:
                # Regular schedule reasoning for standard batches
                reasoning_data = create_batch_reasoning_regular_schedule(
                    product_name=batch_data["product_name"],
                    schedule_frequency="daily",
                    batch_size=batch_data["planned_quantity"]
                )
        except Exception as e:
            logger.warning(f"Failed to generate reasoning_data for batch {batch_number}: {e}")

        # Create production batch
        batch = ProductionBatch(
            id=batch_id,
            tenant_id=tenant_id,
            batch_number=batch_number,
            product_id=uuid.UUID(batch_data["product_id"]),
            product_name=batch_data["product_name"],
            recipe_id=uuid.UUID(batch_data["recipe_id"]) if batch_data.get("recipe_id") else None,
            planned_start_time=planned_start,
            planned_end_time=planned_end,
            planned_quantity=batch_data["planned_quantity"],
            planned_duration_minutes=batch_data["planned_duration_minutes"],
            actual_start_time=actual_start,
            actual_end_time=actual_end,
            actual_quantity=batch_data.get("actual_quantity"),
            actual_duration_minutes=actual_duration,
            status=status,
            priority=priority,
            current_process_stage=current_stage,
            yield_percentage=batch_data.get("yield_percentage"),
            quality_score=batch_data.get("quality_score"),
            waste_quantity=batch_data.get("waste_quantity"),
            defect_quantity=batch_data.get("defect_quantity"),
            estimated_cost=batch_data.get("estimated_cost"),
            actual_cost=batch_data.get("actual_cost"),
            labor_cost=batch_data.get("labor_cost"),
            material_cost=batch_data.get("material_cost"),
            overhead_cost=batch_data.get("overhead_cost"),
            equipment_used=batch_data.get("equipment_used"),
            station_id=batch_data.get("station_id"),
            is_rush_order=batch_data.get("is_rush_order", False),
            is_special_recipe=batch_data.get("is_special_recipe", False),
            is_ai_assisted=batch_data.get("is_ai_assisted", False),
            waste_defect_type=batch_data.get("waste_defect_type"),
            production_notes=batch_data.get("production_notes"),
            quality_notes=batch_data.get("quality_notes"),
            reasoning_data=reasoning_data,  # Structured reasoning for i18n support
            created_at=BASE_REFERENCE_DATE,
            updated_at=BASE_REFERENCE_DATE,
            completed_at=completed_at
        )

        db.add(batch)
        count += 1
        logger.debug(f"Created production batch: {batch.batch_number}", batch_id=str(batch.id))

    await db.commit()
    logger.info(f"Successfully created {count} production batches for {tenant_name}")

    return {
        "tenant_id": str(tenant_id),
        "batches_created": count,
        "skipped": False
    }


async def seed_all(db: AsyncSession):
    """Seed all demo tenants with production batches"""
    logger.info("Starting demo production batches seed process")

    # Load batches data
    data = load_batches_data()

    results = []

    # Both tenants get the same production batches
    result_san_pablo = await seed_batches_for_tenant(
        db,
        DEMO_TENANT_SAN_PABLO,
        "San Pablo - Individual Bakery",
        data["lotes_produccion"]
    )
    results.append(result_san_pablo)

    result_la_espiga = await seed_batches_for_tenant(
        db,
        DEMO_TENANT_LA_ESPIGA,
        "La Espiga - Central Bakery",
        data["lotes_produccion"]
    )
    results.append(result_la_espiga)

    total_created = sum(r["batches_created"] for r in results)

    return {
        "results": results,
        "total_batches_created": total_created,
        "status": "completed"
    }


async def main():
    """Main execution function"""
    # Get database URL from environment
    database_url = os.getenv("PRODUCTION_DATABASE_URL")
    if not database_url:
        logger.error("PRODUCTION_DATABASE_URL environment variable must be set")
        return 1

    # Ensure asyncpg driver
    if database_url.startswith("postgresql://"):
        database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)

    # Create async engine
    engine = create_async_engine(database_url, echo=False)
    async_session = sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)

    try:
        async with async_session() as session:
            result = await seed_all(session)

            logger.info(
                "Production batches seed completed successfully!",
                total_batches=result["total_batches_created"],
                status=result["status"]
            )

            # Print summary
            print("\n" + "="*60)
            print("DEMO PRODUCTION BATCHES SEED SUMMARY")
            print("="*60)
            for tenant_result in result["results"]:
                tenant_id = tenant_result["tenant_id"]
                count = tenant_result["batches_created"]
                skipped = tenant_result.get("skipped", False)
                status = "SKIPPED (already exists)" if skipped else f"CREATED {count} batches"
                print(f"Tenant {tenant_id}: {status}")
            print(f"\nTotal Batches Created: {result['total_batches_created']}")
            print("="*60 + "\n")

            return 0

    except Exception as e:
        logger.error(f"Production batches seed failed: {str(e)}", exc_info=True)
        return 1
    finally:
        await engine.dispose()


if __name__ == "__main__":
    exit_code = asyncio.run(main())
    sys.exit(exit_code)