#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Demo Sales Seeding Script for Sales Service Creates realistic historical sales data for demo template tenants This script runs as a Kubernetes init job inside the sales-service container. It populates the template tenants with historical sales data. Usage: python /app/scripts/demo/seed_demo_sales.py Environment Variables Required: SALES_DATABASE_URL - PostgreSQL connection string for sales database INVENTORY_DATABASE_URL - PostgreSQL connection string for inventory database (to lookup products) DEMO_MODE - Set to 'production' for production seeding LOG_LEVEL - Logging level (default: INFO) """ import asyncio import uuid import sys import os from datetime import datetime, timezone, timedelta from pathlib import Path import random from decimal import Decimal # Add app to path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from sqlalchemy import select, text import structlog from app.models.sales import SalesData # Configure logging structlog.configure( processors=[ structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="iso"), structlog.dev.ConsoleRenderer() ] ) logger = structlog.get_logger() # Fixed Demo Tenant IDs (must match tenant service) DEMO_TENANT_SAN_PABLO = uuid.UUID("a1b2c3d4-e5f6-47a8-b9c0-d1e2f3a4b5c6") DEMO_TENANT_LA_ESPIGA = uuid.UUID("b2c3d4e5-f6a7-48b9-c0d1-e2f3a4b5c6d7") # Hardcoded product IDs from ingredientes_es.json (finished products) PRODUCT_IDS = { "PRO-BAG-001": "20000000-0000-0000-0000-000000000001", # Baguette Tradicional "PRO-CRO-001": "20000000-0000-0000-0000-000000000002", # Croissant de Mantequilla "PRO-PUE-001": "20000000-0000-0000-0000-000000000003", # Pan de Pueblo "PRO-NAP-001": "20000000-0000-0000-0000-000000000004", # Napolitana de Chocolate } # Sample product SKUs and their typical sales patterns SAN_PABLO_PRODUCTS = [ {"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 80, "variance": 15, "price": 1.20}, {"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 50, "variance": 10, "price": 1.50}, {"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 20, "variance": 5, "price": 3.50}, {"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 35, "variance": 8, "price": 1.80}, ] LA_ESPIGA_PRODUCTS = [ {"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 500, "variance": 80, "price": 0.90}, {"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 300, "variance": 50, "price": 1.10}, {"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 100, "variance": 20, "price": 2.80}, {"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 200, "variance": 40, "price": 1.40}, ] def get_product_by_sku(tenant_id: uuid.UUID, sku: str, product_name: str): """ Get tenant-specific product ID using hardcoded base IDs (no database lookup needed) Args: tenant_id: Tenant UUID sku: Product SKU code product_name: Product name Returns: Tuple of (product_id, product_name) or (None, None) if not found """ if sku not in PRODUCT_IDS: return None, None # Generate tenant-specific product ID (same as inventory seed script) base_product_id = uuid.UUID(PRODUCT_IDS[sku]) tenant_int = int(tenant_id.hex, 16) product_id = uuid.UUID(int=tenant_int ^ int(base_product_id.hex, 16)) return product_id, product_name async def seed_sales_for_tenant( sales_db: AsyncSession, tenant_id: uuid.UUID, tenant_name: str, product_patterns: list, days_of_history: int = 90 ) -> dict: """ Seed sales data for a specific tenant Args: sales_db: Sales database session tenant_id: UUID of the tenant tenant_name: Name of the tenant (for logging) product_patterns: List of product sales patterns days_of_history: Number of days of historical data to generate Returns: Dict with seeding statistics """ logger.info("─" * 80) logger.info(f"Seeding sales data for: {tenant_name}") logger.info(f"Tenant ID: {tenant_id}") logger.info(f"Days of history: {days_of_history}") logger.info("─" * 80) created_sales = 0 skipped_sales = 0 # Generate sales data for each day for days_ago in range(days_of_history, 0, -1): sale_date = datetime.now(timezone.utc) - timedelta(days=days_ago) # Skip some random days to simulate closures if random.random() < 0.05: # 5% chance of being closed continue # For each product, generate sales for product_pattern in product_patterns: sku = product_pattern["sku"] product_name = product_pattern["name"] # Get tenant-specific product ID using hardcoded base IDs product_id, product_name = get_product_by_sku(tenant_id, sku, product_name) if not product_id: logger.warning(f" ⚠️ Product not found: {sku}") continue # Check if sales record already exists result = await sales_db.execute( select(SalesData).where( SalesData.tenant_id == tenant_id, SalesData.inventory_product_id == product_id, SalesData.date == sale_date ) ) existing = result.scalars().first() if existing: skipped_sales += 1 continue # Calculate sales quantity with variance avg_qty = product_pattern["avg_qty"] variance = product_pattern["variance"] # Add weekly patterns (weekends sell more) weekday = sale_date.weekday() if weekday in [5, 6]: # Saturday, Sunday multiplier = random.uniform(1.2, 1.5) else: multiplier = random.uniform(0.8, 1.2) quantity = max(0, int((avg_qty + random.uniform(-variance, variance)) * multiplier)) if quantity == 0: continue # Calculate revenue unit_price = Decimal(str(product_pattern["price"])) revenue = Decimal(str(quantity)) * unit_price # Check if it's a weekend is_weekend = weekday in [5, 6] # Create sales record sales_record = SalesData( id=uuid.uuid4(), tenant_id=tenant_id, inventory_product_id=product_id, date=sale_date, quantity_sold=quantity, revenue=revenue, unit_price=unit_price, sales_channel="in_store", location_id="main", source="demo_seed", is_weekend=is_weekend, created_at=sale_date, updated_at=sale_date ) sales_db.add(sales_record) created_sales += 1 # Commit all changes for this tenant await sales_db.commit() logger.info(f" 📊 Created: {created_sales}, Skipped: {skipped_sales}") logger.info("") return { "tenant_id": str(tenant_id), "tenant_name": tenant_name, "sales_records_created": created_sales, "sales_records_skipped": skipped_sales, "days_of_history": days_of_history } async def seed_sales(sales_db: AsyncSession): """ Seed sales for all demo template tenants Args: sales_db: Sales database session Returns: Dict with overall seeding statistics """ logger.info("=" * 80) logger.info("💰 Starting Demo Sales Seeding") logger.info("=" * 80) results = [] # Seed for San Pablo (Traditional Bakery) - 30 days of history (optimized for fast demo loading) logger.info("") result_san_pablo = await seed_sales_for_tenant( sales_db, DEMO_TENANT_SAN_PABLO, "Panadería San Pablo (Traditional)", SAN_PABLO_PRODUCTS, days_of_history=30 ) results.append(result_san_pablo) # Seed for La Espiga (Central Workshop) - 30 days of history (optimized for fast demo loading) result_la_espiga = await seed_sales_for_tenant( sales_db, DEMO_TENANT_LA_ESPIGA, "Panadería La Espiga (Central Workshop)", LA_ESPIGA_PRODUCTS, days_of_history=30 ) results.append(result_la_espiga) # Calculate totals total_sales = sum(r["sales_records_created"] for r in results) total_skipped = sum(r["sales_records_skipped"] for r in results) logger.info("=" * 80) logger.info("✅ Demo Sales Seeding Completed") logger.info("=" * 80) return { "service": "sales", "tenants_seeded": len(results), "total_sales_created": total_sales, "total_skipped": total_skipped, "results": results } async def main(): """Main execution function""" logger.info("Demo Sales Seeding Script Starting") logger.info("Mode: %s", os.getenv("DEMO_MODE", "development")) logger.info("Log Level: %s", os.getenv("LOG_LEVEL", "INFO")) # Get database URL from environment sales_database_url = os.getenv("SALES_DATABASE_URL") or os.getenv("DATABASE_URL") if not sales_database_url: logger.error("❌ SALES_DATABASE_URL or DATABASE_URL environment variable must be set") return 1 # Convert to async URLs if needed if sales_database_url.startswith("postgresql://"): sales_database_url = sales_database_url.replace("postgresql://", "postgresql+asyncpg://", 1) logger.info("Connecting to sales database") # Create engine and session sales_engine = create_async_engine( sales_database_url, echo=False, pool_pre_ping=True, pool_size=5, max_overflow=10 ) sales_session_maker = sessionmaker( sales_engine, class_=AsyncSession, expire_on_commit=False ) try: async with sales_session_maker() as sales_session: result = await seed_sales(sales_session) logger.info("") logger.info("📊 Seeding Summary:") logger.info(f" ✅ Tenants seeded: {result['tenants_seeded']}") logger.info(f" ✅ Sales records created: {result['total_sales_created']}") logger.info(f" ⏭️ Skipped: {result['total_skipped']}") logger.info("") # Print per-tenant details for tenant_result in result['results']: logger.info( f" {tenant_result['tenant_name']}: " f"{tenant_result['sales_records_created']} sales records " f"({tenant_result['days_of_history']} days)" ) logger.info("") logger.info("🎉 Success! Sales history is ready for cloning.") logger.info("") logger.info("Sales data includes:") logger.info(" • 30 days of historical sales (optimized for demo performance)") logger.info(" • 4 product types per tenant") logger.info(" • Realistic weekly patterns (higher on weekends)") logger.info(" • Random variance and occasional closures") logger.info("") logger.info("Next steps:") logger.info(" 1. Run seed jobs for other services (orders, production, etc.)") logger.info(" 2. Verify sales data in database") logger.info(" 3. Test demo session creation with sales cloning") logger.info("") return 0 except Exception as e: logger.error("=" * 80) logger.error("❌ Demo Sales Seeding Failed") logger.error("=" * 80) logger.error("Error: %s", str(e)) logger.error("", exc_info=True) return 1 finally: await sales_engine.dispose() if __name__ == "__main__": exit_code = asyncio.run(main()) sys.exit(exit_code)