#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Demo Retail Sales Seeding Script for Sales Service Creates realistic historical sales data for child retail outlets This script runs as a Kubernetes init job inside the sales-service container. It populates child retail tenants with 30 days of sales history. Usage: python /app/scripts/demo/seed_demo_sales_retail.py Environment Variables Required: SALES_DATABASE_URL - PostgreSQL connection string for sales database DEMO_MODE - Set to 'production' for production seeding LOG_LEVEL - Logging level (default: INFO) """ import asyncio import uuid import sys import os from datetime import datetime, timezone, timedelta from pathlib import Path import random from decimal import Decimal # Add app to path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) # Add shared to path for demo utilities sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent)) from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from sqlalchemy import select import structlog from shared.utils.demo_dates import BASE_REFERENCE_DATE from app.models.sales import SalesData # Configure logging structlog.configure( processors=[ structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="iso"), structlog.dev.ConsoleRenderer() ] ) logger = structlog.get_logger() # Fixed Demo Tenant IDs (must match tenant service) DEMO_TENANT_CHILD_1 = uuid.UUID("d4e5f6a7-b8c9-40d1-e2f3-a4b5c6d7e8f9") # Madrid Centro DEMO_TENANT_CHILD_2 = uuid.UUID("e5f6a7b8-c9d0-41e2-f3a4-b5c6d7e8f9a0") # Barcelona Gràcia DEMO_TENANT_CHILD_3 = uuid.UUID("f6a7b8c9-d0e1-42f3-a4b5-c6d7e8f9a0b1") # Valencia Ruzafa # Hardcoded product IDs from ingredientes_es.json (finished products) PRODUCT_IDS = { "PRO-BAG-001": "20000000-0000-0000-0000-000000000001", # Baguette Tradicional "PRO-CRO-001": "20000000-0000-0000-0000-000000000002", # Croissant de Mantequilla "PRO-PUE-001": "20000000-0000-0000-0000-000000000003", # Pan de Pueblo "PRO-NAP-001": "20000000-0000-0000-0000-000000000004", # Napolitana de Chocolate } # Retail sales patterns for each store # Madrid Centro - Large urban store, high traffic MADRID_CENTRO_PRODUCTS = [ {"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 120, "variance": 20, "price": 1.30}, {"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 80, "variance": 15, "price": 1.60}, {"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 35, "variance": 8, "price": 3.80}, {"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 60, "variance": 12, "price": 1.90}, ] # Barcelona Gràcia - Medium neighborhood store BARCELONA_GRACIA_PRODUCTS = [ {"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 90, "variance": 15, "price": 1.25}, {"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 60, "variance": 12, "price": 1.55}, {"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 25, "variance": 6, "price": 3.70}, {"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 45, "variance": 10, "price": 1.85}, ] # Valencia Ruzafa - Smaller boutique store VALENCIA_RUZAFA_PRODUCTS = [ {"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 70, "variance": 12, "price": 1.20}, {"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 45, "variance": 10, "price": 1.50}, {"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 20, "variance": 5, "price": 3.60}, {"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 35, "variance": 8, "price": 1.80}, ] # Child tenant configurations CHILD_TENANTS = [ (DEMO_TENANT_CHILD_1, "Madrid Centro", MADRID_CENTRO_PRODUCTS), (DEMO_TENANT_CHILD_2, "Barcelona Gràcia", BARCELONA_GRACIA_PRODUCTS), (DEMO_TENANT_CHILD_3, "Valencia Ruzafa", VALENCIA_RUZAFA_PRODUCTS) ] def get_product_by_sku(tenant_id: uuid.UUID, sku: str, product_name: str): """ Get tenant-specific product ID using XOR transformation Args: tenant_id: Tenant UUID sku: Product SKU code product_name: Product name Returns: Tuple of (product_id, product_name) or (None, None) if not found """ if sku not in PRODUCT_IDS: return None, None # Generate tenant-specific product ID using XOR (same as inventory seed script) base_product_id = uuid.UUID(PRODUCT_IDS[sku]) tenant_int = int(tenant_id.hex, 16) product_id = uuid.UUID(int=tenant_int ^ int(base_product_id.hex, 16)) return product_id, product_name async def seed_retail_sales_for_tenant( db: AsyncSession, tenant_id: uuid.UUID, tenant_name: str, product_patterns: list, days_of_history: int = 30 ) -> dict: """ Seed retail sales data for a specific child tenant Args: db: Database session tenant_id: UUID of the child tenant tenant_name: Name of the tenant (for logging) product_patterns: List of product sales patterns days_of_history: Number of days of historical data to generate (default: 30) Returns: Dict with seeding statistics """ logger.info("─" * 80) logger.info(f"Seeding retail sales data for: {tenant_name}") logger.info(f"Tenant ID: {tenant_id}") logger.info(f"Days of history: {days_of_history}") logger.info("─" * 80) created_sales = 0 skipped_sales = 0 # Generate sales data for each day (working backwards from BASE_REFERENCE_DATE) for days_ago in range(days_of_history, 0, -1): sale_date = BASE_REFERENCE_DATE - timedelta(days=days_ago) # Skip some random days to simulate closures/holidays (3% chance) if random.random() < 0.03: continue # For each product, generate sales for product_pattern in product_patterns: sku = product_pattern["sku"] product_name = product_pattern["name"] # Get tenant-specific product ID using XOR transformation product_id, product_name = get_product_by_sku(tenant_id, sku, product_name) if not product_id: logger.warning(f" ⚠️ Product not found: {sku}") continue # Check if sales record already exists result = await db.execute( select(SalesData).where( SalesData.tenant_id == tenant_id, SalesData.inventory_product_id == product_id, SalesData.date == sale_date ) ) existing = result.scalars().first() if existing: skipped_sales += 1 continue # Calculate sales quantity with realistic variance avg_qty = product_pattern["avg_qty"] variance = product_pattern["variance"] # Add weekly patterns (weekends sell more for bakeries) weekday = sale_date.weekday() if weekday in [5, 6]: # Saturday, Sunday multiplier = random.uniform(1.3, 1.6) # 30-60% more sales on weekends elif weekday == 4: # Friday multiplier = random.uniform(1.1, 1.3) # 10-30% more on Fridays else: # Weekdays multiplier = random.uniform(0.85, 1.15) quantity = max(0, int((avg_qty + random.uniform(-variance, variance)) * multiplier)) if quantity == 0: continue # Calculate revenue unit_price = Decimal(str(product_pattern["price"])) revenue = Decimal(str(quantity)) * unit_price # Determine if weekend is_weekend = weekday in [5, 6] # Create sales record sales_record = SalesData( id=uuid.uuid4(), tenant_id=tenant_id, inventory_product_id=product_id, date=sale_date, quantity_sold=quantity, revenue=revenue, unit_price=unit_price, sales_channel="in_store", # Retail outlets primarily use in-store sales location_id="main", # Single location per retail outlet source="demo_seed", is_weekend=is_weekend, created_at=sale_date, updated_at=sale_date ) db.add(sales_record) created_sales += 1 logger.debug( f" ✅ {sale_date.strftime('%Y-%m-%d')}: {product_name} - " f"{quantity} units @ €{unit_price} = €{revenue:.2f}" ) # Commit all changes for this tenant await db.commit() logger.info(f" 📊 Sales records created: {created_sales}, Skipped: {skipped_sales}") logger.info("") return { "tenant_id": str(tenant_id), "tenant_name": tenant_name, "sales_created": created_sales, "sales_skipped": skipped_sales, "days_of_history": days_of_history } async def seed_retail_sales(db: AsyncSession): """ Seed retail sales for all child tenant templates Args: db: Database session Returns: Dict with overall seeding statistics """ logger.info("=" * 80) logger.info("💰 Starting Demo Retail Sales Seeding") logger.info("=" * 80) logger.info("Creating 30 days of sales history for retail outlets") logger.info("") results = [] # Seed for each child retail outlet for child_tenant_id, child_tenant_name, product_patterns in CHILD_TENANTS: logger.info("") result = await seed_retail_sales_for_tenant( db, child_tenant_id, f"{child_tenant_name} (Retail Outlet)", product_patterns, days_of_history=30 # 30 days of sales history ) results.append(result) # Calculate totals total_sales = sum(r["sales_created"] for r in results) total_skipped = sum(r["sales_skipped"] for r in results) logger.info("=" * 80) logger.info("✅ Demo Retail Sales Seeding Completed") logger.info("=" * 80) return { "service": "sales_retail", "tenants_seeded": len(results), "total_sales_created": total_sales, "total_skipped": total_skipped, "results": results } async def main(): """Main execution function""" logger.info("Demo Retail Sales Seeding Script Starting") logger.info("Mode: %s", os.getenv("DEMO_MODE", "development")) logger.info("Log Level: %s", os.getenv("LOG_LEVEL", "INFO")) # Get database URL from environment database_url = os.getenv("SALES_DATABASE_URL") or os.getenv("DATABASE_URL") if not database_url: logger.error("❌ SALES_DATABASE_URL or DATABASE_URL environment variable must be set") return 1 # Convert to async URL if needed if database_url.startswith("postgresql://"): database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1) logger.info("Connecting to sales database") # Create engine and session engine = create_async_engine( database_url, echo=False, pool_pre_ping=True, pool_size=5, max_overflow=10 ) async_session = sessionmaker( engine, class_=AsyncSession, expire_on_commit=False ) try: async with async_session() as session: result = await seed_retail_sales(session) logger.info("") logger.info("📊 Retail Sales Seeding Summary:") logger.info(f" ✅ Retail outlets seeded: {result['tenants_seeded']}") logger.info(f" ✅ Total sales records: {result['total_sales_created']}") logger.info(f" ⏭️ Total skipped: {result['total_skipped']}") logger.info("") # Print per-tenant details for tenant_result in result['results']: logger.info( f" {tenant_result['tenant_name']}: " f"{tenant_result['sales_created']} sales records" ) logger.info("") logger.info("🎉 Success! Retail sales history is ready for cloning.") logger.info("") logger.info("Sales characteristics:") logger.info(" ✓ 30 days of historical data") logger.info(" ✓ Weekend sales boost (30-60% higher)") logger.info(" ✓ Friday pre-weekend surge (10-30% higher)") logger.info(" ✓ Realistic variance per product") logger.info(" ✓ Store-specific pricing and volumes") logger.info("") logger.info("Next steps:") logger.info(" 1. Seed customer data") logger.info(" 2. Seed retail orders (internal transfers from parent)") logger.info(" 3. Test forecasting with retail sales data") logger.info("") return 0 except Exception as e: logger.error("=" * 80) logger.error("❌ Demo Retail Sales Seeding Failed") logger.error("=" * 80) logger.error("Error: %s", str(e)) logger.error("", exc_info=True) return 1 finally: await engine.dispose() if __name__ == "__main__": exit_code = asyncio.run(main()) sys.exit(exit_code)