#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Demo Retail Customer Seeding Script for Orders Service Creates walk-in customers for child retail outlets This script runs as a Kubernetes init job inside the orders-service container. It populates child retail tenants with realistic customer profiles. Usage: python /app/scripts/demo/seed_demo_customers_retail.py Environment Variables Required: ORDERS_DATABASE_URL - PostgreSQL connection string for orders database DEMO_MODE - Set to 'production' for production seeding LOG_LEVEL - Logging level (default: INFO) """ import asyncio import uuid import sys import os import random from datetime import datetime, timezone, timedelta from pathlib import Path # Add app to path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) # Add shared to path for demo utilities sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent)) from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine from sqlalchemy.orm import sessionmaker from sqlalchemy import select import structlog from shared.utils.demo_dates import BASE_REFERENCE_DATE from app.models.customer import Customer # Configure logging structlog.configure( processors=[ structlog.stdlib.add_log_level, structlog.processors.TimeStamper(fmt="iso"), structlog.dev.ConsoleRenderer() ] ) logger = structlog.get_logger() # Fixed Demo Tenant IDs (must match tenant service) DEMO_TENANT_CHILD_1 = uuid.UUID("d4e5f6a7-b8c9-40d1-e2f3-a4b5c6d7e8f9") # Madrid Centro DEMO_TENANT_CHILD_2 = uuid.UUID("e5f6a7b8-c9d0-41e2-f3a4-b5c6d7e8f9a0") # Barcelona Gràcia DEMO_TENANT_CHILD_3 = uuid.UUID("f6a7b8c9-d0e1-42f3-a4b5-c6d7e8f9a0b1") # Valencia Ruzafa # Spanish first names and surnames for realistic customer generation FIRST_NAMES = [ "Carmen", "María", "José", "Antonio", "Ana", "Manuel", "Francisca", "David", "Laura", "Daniel", "Marta", "Carlos", "Isabel", "Javier", "Lucía", "Miguel", "Sofía", "Francisco", "Elena", "Rafael", "Paula", "Pedro", "Cristina", "Luis", "Sara", "Fernando", "Raquel", "Alberto", "Beatriz", "Alejandro", "Natalia", "Pablo", "Silvia", "Jorge", "Mónica", "Sergio", "Andrea", "Rubén", "Virginia", "Diego", "Pilar", "Iván", "Teresa", "Adrián", "Nuria", "Óscar", "Patricia" ] SURNAMES = [ "García", "Rodríguez", "González", "Fernández", "López", "Martínez", "Sánchez", "Pérez", "Gómez", "Martín", "Jiménez", "Ruiz", "Hernández", "Díaz", "Moreno", "Muñoz", "Álvarez", "Romero", "Alonso", "Gutiérrez", "Navarro", "Torres", "Domínguez", "Vázquez", "Ramos", "Gil", "Ramírez", "Serrano", "Blanco", "Suárez", "Molina", "Castro", "Ortega", "Delgado", "Ortiz", "Morales", "Jiménez", "Núñez", "Medina", "Aguilar" ] # Customer segment distribution for retail CUSTOMER_SEGMENTS = [ ("regular", 0.60), # 60% regular customers ("loyal", 0.25), # 25% loyal customers ("occasional", 0.15) # 15% occasional customers ] def generate_spanish_name(): """Generate a realistic Spanish name""" first_name = random.choice(FIRST_NAMES) surname1 = random.choice(SURNAMES) surname2 = random.choice(SURNAMES) return f"{first_name} {surname1} {surname2}" def generate_customer_email(name: str, customer_code: str): """Generate a realistic email address""" # Create email-safe version of name parts = name.lower().split() if len(parts) >= 2: email_name = f"{parts[0]}.{parts[1]}" else: email_name = parts[0] # Remove accents email_name = email_name.replace('á', 'a').replace('é', 'e').replace('í', 'i') email_name = email_name.replace('ó', 'o').replace('ú', 'u').replace('ñ', 'n') domains = ["gmail.com", "hotmail.es", "yahoo.es", "outlook.es", "protonmail.com"] domain = random.choice(domains) return f"{email_name}{random.randint(1, 99)}@{domain}" def generate_spanish_phone(): """Generate a realistic Spanish mobile phone number""" # Spanish mobile numbers start with 6 or 7 prefix = random.choice(['6', '7']) number = ''.join([str(random.randint(0, 9)) for _ in range(8)]) return f"+34 {prefix}{number[0:2]} {number[2:5]} {number[5:8]}" def select_customer_segment(): """Select customer segment based on distribution""" rand = random.random() cumulative = 0.0 for segment, probability in CUSTOMER_SEGMENTS: cumulative += probability if rand <= cumulative: return segment return "regular" async def seed_retail_customers_for_tenant( db: AsyncSession, tenant_id: uuid.UUID, tenant_name: str, num_customers: int, city: str ) -> dict: """ Seed walk-in customers for a retail outlet Args: db: Database session tenant_id: UUID of the child tenant tenant_name: Name of the tenant (for logging) num_customers: Number of customers to generate city: City name for address generation Returns: Dict with seeding statistics """ logger.info("─" * 80) logger.info(f"Seeding retail customers for: {tenant_name}") logger.info(f"Tenant ID: {tenant_id}") logger.info(f"Number of customers: {num_customers}") logger.info("─" * 80) # Check if customers already exist result = await db.execute( select(Customer).where(Customer.tenant_id == tenant_id).limit(1) ) existing = result.scalar_one_or_none() if existing: logger.info(f"Customers already exist for {tenant_name}, skipping seed") return {"tenant_id": str(tenant_id), "customers_created": 0, "skipped": True} created_count = 0 for i in range(num_customers): # Generate customer details name = generate_spanish_name() customer_code = f"RET-{str(tenant_id).split('-')[0].upper()[:4]}-{i+1:04d}" email = generate_customer_email(name, customer_code) if random.random() > 0.2 else None # 80% have email phone = generate_spanish_phone() if random.random() > 0.1 else None # 90% have phone # Customer segment determines behavior segment = select_customer_segment() # Determine order history based on segment if segment == "loyal": total_orders = random.randint(15, 40) avg_order_value = random.uniform(15.0, 35.0) days_since_last_order = random.randint(1, 7) elif segment == "regular": total_orders = random.randint(5, 15) avg_order_value = random.uniform(8.0, 20.0) days_since_last_order = random.randint(3, 14) else: # occasional total_orders = random.randint(1, 5) avg_order_value = random.uniform(5.0, 15.0) days_since_last_order = random.randint(14, 60) total_spent = total_orders * avg_order_value last_order_date = BASE_REFERENCE_DATE - timedelta(days=days_since_last_order) first_order_date = BASE_REFERENCE_DATE - timedelta(days=random.randint(30, 365)) # Most retail customers are individuals (not businesses) is_business = random.random() < 0.05 # 5% are small businesses (cafes, hotels, etc.) if is_business: business_name = f"{name.split()[0]} {random.choice(['Cafetería', 'Restaurante', 'Hotel', 'Catering'])}" customer_type = "business" tax_id = f"B{random.randint(10000000, 99999999)}" # Spanish NIF for businesses else: business_name = None customer_type = "individual" tax_id = None # Create customer customer = Customer( id=uuid.uuid4(), tenant_id=tenant_id, customer_code=customer_code, name=name, business_name=business_name, customer_type=customer_type, tax_id=tax_id, email=email, phone=phone, address_line1=None, # Walk-in customers don't always provide full address city=city if random.random() > 0.3 else None, # 70% have city info state=None, postal_code=None, country="España", is_active=True, preferred_delivery_method="pickup", # Retail customers typically pick up payment_terms="immediate", # Retail is always immediate payment credit_limit=None, # No credit for retail discount_percentage=5.0 if segment == "loyal" else 0.0, # Loyal customers get 5% discount customer_segment=segment, priority_level="normal", special_instructions=None, total_orders=total_orders, total_spent=total_spent, average_order_value=avg_order_value, last_order_date=last_order_date, created_at=first_order_date, updated_at=BASE_REFERENCE_DATE ) db.add(customer) created_count += 1 if created_count % 20 == 0: logger.debug(f" Created {created_count}/{num_customers} customers...") # Commit all changes await db.commit() logger.info(f" 📊 Customers created: {created_count}") logger.info("") return { "tenant_id": str(tenant_id), "tenant_name": tenant_name, "customers_created": created_count, "skipped": False } async def seed_retail_customers(db: AsyncSession): """ Seed retail customers for all child tenant templates Args: db: Database session Returns: Dict with overall seeding statistics """ logger.info("=" * 80) logger.info("👥 Starting Demo Retail Customers Seeding") logger.info("=" * 80) logger.info("Creating walk-in customer profiles for retail outlets") logger.info("") results = [] # Seed customers for each retail outlet # Larger stores have more customers retail_configs = [ (DEMO_TENANT_CHILD_1, "Madrid Centro", 100, "Madrid"), # Large urban store (DEMO_TENANT_CHILD_2, "Barcelona Gràcia", 75, "Barcelona"), # Medium store (DEMO_TENANT_CHILD_3, "Valencia Ruzafa", 60, "Valencia") # Smaller boutique store ] for tenant_id, tenant_name, num_customers, city in retail_configs: logger.info("") result = await seed_retail_customers_for_tenant( db, tenant_id, f"{tenant_name} (Retail Outlet)", num_customers, city ) results.append(result) # Calculate totals total_customers = sum(r["customers_created"] for r in results) logger.info("=" * 80) logger.info("✅ Demo Retail Customers Seeding Completed") logger.info("=" * 80) return { "service": "customers_retail", "tenants_seeded": len(results), "total_customers_created": total_customers, "results": results } async def main(): """Main execution function""" logger.info("Demo Retail Customers Seeding Script Starting") logger.info("Mode: %s", os.getenv("DEMO_MODE", "development")) logger.info("Log Level: %s", os.getenv("LOG_LEVEL", "INFO")) # Get database URL from environment database_url = os.getenv("ORDERS_DATABASE_URL") or os.getenv("DATABASE_URL") if not database_url: logger.error("❌ ORDERS_DATABASE_URL or DATABASE_URL environment variable must be set") return 1 # Convert to async URL if needed if database_url.startswith("postgresql://"): database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1) logger.info("Connecting to orders database") # Create engine and session engine = create_async_engine( database_url, echo=False, pool_pre_ping=True, pool_size=5, max_overflow=10 ) async_session = sessionmaker( engine, class_=AsyncSession, expire_on_commit=False ) try: async with async_session() as session: result = await seed_retail_customers(session) logger.info("") logger.info("📊 Retail Customers Seeding Summary:") logger.info(f" ✅ Retail outlets seeded: {result['tenants_seeded']}") logger.info(f" ✅ Total customers created: {result['total_customers_created']}") logger.info("") # Print per-tenant details for tenant_result in result['results']: if not tenant_result['skipped']: logger.info( f" {tenant_result['tenant_name']}: " f"{tenant_result['customers_created']} customers" ) logger.info("") logger.info("🎉 Success! Retail customer base is ready for cloning.") logger.info("") logger.info("Customer characteristics:") logger.info(" ✓ Realistic Spanish names and contact info") logger.info(" ✓ Segmentation: 60% regular, 25% loyal, 15% occasional") logger.info(" ✓ 95% individual customers, 5% small businesses") logger.info(" ✓ Order history and spending patterns") logger.info(" ✓ Loyal customers receive 5% discount") logger.info("") logger.info("Next steps:") logger.info(" 1. Seed retail orders (internal transfers from parent)") logger.info(" 2. Seed POS configurations") logger.info(" 3. Test customer analytics and segmentation") logger.info("") return 0 except Exception as e: logger.error("=" * 80) logger.error("❌ Demo Retail Customers Seeding Failed") logger.error("=" * 80) logger.error("Error: %s", str(e)) logger.error("", exc_info=True) return 1 finally: await engine.dispose() if __name__ == "__main__": exit_code = asyncio.run(main()) sys.exit(exit_code)