382 lines
13 KiB
Python
382 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Demo Retail Sales Seeding Script for Sales Service
|
|
Creates realistic historical sales data for child retail outlets
|
|
|
|
This script runs as a Kubernetes init job inside the sales-service container.
|
|
It populates child retail tenants with 30 days of sales history.
|
|
|
|
Usage:
|
|
python /app/scripts/demo/seed_demo_sales_retail.py
|
|
|
|
Environment Variables Required:
|
|
SALES_DATABASE_URL - PostgreSQL connection string for sales database
|
|
DEMO_MODE - Set to 'production' for production seeding
|
|
LOG_LEVEL - Logging level (default: INFO)
|
|
"""
|
|
|
|
import asyncio
|
|
import uuid
|
|
import sys
|
|
import os
|
|
from datetime import datetime, timezone, timedelta
|
|
from pathlib import Path
|
|
import random
|
|
from decimal import Decimal
|
|
|
|
# Add app to path
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
# Add shared to path for demo utilities
|
|
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent))
|
|
|
|
from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine
|
|
from sqlalchemy.orm import sessionmaker
|
|
from sqlalchemy import select
|
|
import structlog
|
|
|
|
from shared.utils.demo_dates import BASE_REFERENCE_DATE
|
|
|
|
from app.models.sales import SalesData
|
|
|
|
# Configure logging
|
|
structlog.configure(
|
|
processors=[
|
|
structlog.stdlib.add_log_level,
|
|
structlog.processors.TimeStamper(fmt="iso"),
|
|
structlog.dev.ConsoleRenderer()
|
|
]
|
|
)
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
# Fixed Demo Tenant IDs (must match tenant service)
|
|
DEMO_TENANT_CHILD_1 = uuid.UUID("d4e5f6a7-b8c9-40d1-e2f3-a4b5c6d7e8f9") # Madrid Centro
|
|
DEMO_TENANT_CHILD_2 = uuid.UUID("e5f6a7b8-c9d0-41e2-f3a4-b5c6d7e8f9a0") # Barcelona Gràcia
|
|
DEMO_TENANT_CHILD_3 = uuid.UUID("f6a7b8c9-d0e1-42f3-a4b5-c6d7e8f9a0b1") # Valencia Ruzafa
|
|
|
|
# Hardcoded product IDs from ingredientes_es.json (finished products)
|
|
PRODUCT_IDS = {
|
|
"PRO-BAG-001": "20000000-0000-0000-0000-000000000001", # Baguette Tradicional
|
|
"PRO-CRO-001": "20000000-0000-0000-0000-000000000002", # Croissant de Mantequilla
|
|
"PRO-PUE-001": "20000000-0000-0000-0000-000000000003", # Pan de Pueblo
|
|
"PRO-NAP-001": "20000000-0000-0000-0000-000000000004", # Napolitana de Chocolate
|
|
}
|
|
|
|
# Retail sales patterns for each store
|
|
# Madrid Centro - Large urban store, high traffic
|
|
MADRID_CENTRO_PRODUCTS = [
|
|
{"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 120, "variance": 20, "price": 1.30},
|
|
{"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 80, "variance": 15, "price": 1.60},
|
|
{"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 35, "variance": 8, "price": 3.80},
|
|
{"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 60, "variance": 12, "price": 1.90},
|
|
]
|
|
|
|
# Barcelona Gràcia - Medium neighborhood store
|
|
BARCELONA_GRACIA_PRODUCTS = [
|
|
{"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 90, "variance": 15, "price": 1.25},
|
|
{"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 60, "variance": 12, "price": 1.55},
|
|
{"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 25, "variance": 6, "price": 3.70},
|
|
{"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 45, "variance": 10, "price": 1.85},
|
|
]
|
|
|
|
# Valencia Ruzafa - Smaller boutique store
|
|
VALENCIA_RUZAFA_PRODUCTS = [
|
|
{"sku": "PRO-BAG-001", "name": "Baguette Tradicional", "avg_qty": 70, "variance": 12, "price": 1.20},
|
|
{"sku": "PRO-CRO-001", "name": "Croissant de Mantequilla", "avg_qty": 45, "variance": 10, "price": 1.50},
|
|
{"sku": "PRO-PUE-001", "name": "Pan de Pueblo", "avg_qty": 20, "variance": 5, "price": 3.60},
|
|
{"sku": "PRO-NAP-001", "name": "Napolitana de Chocolate", "avg_qty": 35, "variance": 8, "price": 1.80},
|
|
]
|
|
|
|
# Child tenant configurations
|
|
CHILD_TENANTS = [
|
|
(DEMO_TENANT_CHILD_1, "Madrid Centro", MADRID_CENTRO_PRODUCTS),
|
|
(DEMO_TENANT_CHILD_2, "Barcelona Gràcia", BARCELONA_GRACIA_PRODUCTS),
|
|
(DEMO_TENANT_CHILD_3, "Valencia Ruzafa", VALENCIA_RUZAFA_PRODUCTS)
|
|
]
|
|
|
|
|
|
def get_product_by_sku(tenant_id: uuid.UUID, sku: str, product_name: str):
|
|
"""
|
|
Get tenant-specific product ID using XOR transformation
|
|
|
|
Args:
|
|
tenant_id: Tenant UUID
|
|
sku: Product SKU code
|
|
product_name: Product name
|
|
|
|
Returns:
|
|
Tuple of (product_id, product_name) or (None, None) if not found
|
|
"""
|
|
if sku not in PRODUCT_IDS:
|
|
return None, None
|
|
|
|
# Generate tenant-specific product ID using XOR (same as inventory seed script)
|
|
base_product_id = uuid.UUID(PRODUCT_IDS[sku])
|
|
tenant_int = int(tenant_id.hex, 16)
|
|
product_id = uuid.UUID(int=tenant_int ^ int(base_product_id.hex, 16))
|
|
|
|
return product_id, product_name
|
|
|
|
|
|
async def seed_retail_sales_for_tenant(
|
|
db: AsyncSession,
|
|
tenant_id: uuid.UUID,
|
|
tenant_name: str,
|
|
product_patterns: list,
|
|
days_of_history: int = 30
|
|
) -> dict:
|
|
"""
|
|
Seed retail sales data for a specific child tenant
|
|
|
|
Args:
|
|
db: Database session
|
|
tenant_id: UUID of the child tenant
|
|
tenant_name: Name of the tenant (for logging)
|
|
product_patterns: List of product sales patterns
|
|
days_of_history: Number of days of historical data to generate (default: 30)
|
|
|
|
Returns:
|
|
Dict with seeding statistics
|
|
"""
|
|
logger.info("─" * 80)
|
|
logger.info(f"Seeding retail sales data for: {tenant_name}")
|
|
logger.info(f"Tenant ID: {tenant_id}")
|
|
logger.info(f"Days of history: {days_of_history}")
|
|
logger.info("─" * 80)
|
|
|
|
created_sales = 0
|
|
skipped_sales = 0
|
|
|
|
# Generate sales data for each day (working backwards from BASE_REFERENCE_DATE)
|
|
for days_ago in range(days_of_history, 0, -1):
|
|
sale_date = BASE_REFERENCE_DATE - timedelta(days=days_ago)
|
|
|
|
# Skip some random days to simulate closures/holidays (3% chance)
|
|
if random.random() < 0.03:
|
|
continue
|
|
|
|
# For each product, generate sales
|
|
for product_pattern in product_patterns:
|
|
sku = product_pattern["sku"]
|
|
product_name = product_pattern["name"]
|
|
|
|
# Get tenant-specific product ID using XOR transformation
|
|
product_id, product_name = get_product_by_sku(tenant_id, sku, product_name)
|
|
|
|
if not product_id:
|
|
logger.warning(f" ⚠️ Product not found: {sku}")
|
|
continue
|
|
|
|
# Check if sales record already exists
|
|
result = await db.execute(
|
|
select(SalesData).where(
|
|
SalesData.tenant_id == tenant_id,
|
|
SalesData.inventory_product_id == product_id,
|
|
SalesData.date == sale_date
|
|
)
|
|
)
|
|
existing = result.scalars().first()
|
|
|
|
if existing:
|
|
skipped_sales += 1
|
|
continue
|
|
|
|
# Calculate sales quantity with realistic variance
|
|
avg_qty = product_pattern["avg_qty"]
|
|
variance = product_pattern["variance"]
|
|
|
|
# Add weekly patterns (weekends sell more for bakeries)
|
|
weekday = sale_date.weekday()
|
|
if weekday in [5, 6]: # Saturday, Sunday
|
|
multiplier = random.uniform(1.3, 1.6) # 30-60% more sales on weekends
|
|
elif weekday == 4: # Friday
|
|
multiplier = random.uniform(1.1, 1.3) # 10-30% more on Fridays
|
|
else: # Weekdays
|
|
multiplier = random.uniform(0.85, 1.15)
|
|
|
|
quantity = max(0, int((avg_qty + random.uniform(-variance, variance)) * multiplier))
|
|
|
|
if quantity == 0:
|
|
continue
|
|
|
|
# Calculate revenue
|
|
unit_price = Decimal(str(product_pattern["price"]))
|
|
revenue = Decimal(str(quantity)) * unit_price
|
|
|
|
# Determine if weekend
|
|
is_weekend = weekday in [5, 6]
|
|
|
|
# Create sales record
|
|
sales_record = SalesData(
|
|
id=uuid.uuid4(),
|
|
tenant_id=tenant_id,
|
|
inventory_product_id=product_id,
|
|
date=sale_date,
|
|
quantity_sold=quantity,
|
|
revenue=revenue,
|
|
unit_price=unit_price,
|
|
sales_channel="in_store", # Retail outlets primarily use in-store sales
|
|
location_id="main", # Single location per retail outlet
|
|
source="demo_seed",
|
|
is_weekend=is_weekend,
|
|
created_at=sale_date,
|
|
updated_at=sale_date
|
|
)
|
|
|
|
db.add(sales_record)
|
|
created_sales += 1
|
|
|
|
logger.debug(
|
|
f" ✅ {sale_date.strftime('%Y-%m-%d')}: {product_name} - "
|
|
f"{quantity} units @ €{unit_price} = €{revenue:.2f}"
|
|
)
|
|
|
|
# Commit all changes for this tenant
|
|
await db.commit()
|
|
|
|
logger.info(f" 📊 Sales records created: {created_sales}, Skipped: {skipped_sales}")
|
|
logger.info("")
|
|
|
|
return {
|
|
"tenant_id": str(tenant_id),
|
|
"tenant_name": tenant_name,
|
|
"sales_created": created_sales,
|
|
"sales_skipped": skipped_sales,
|
|
"days_of_history": days_of_history
|
|
}
|
|
|
|
|
|
async def seed_retail_sales(db: AsyncSession):
|
|
"""
|
|
Seed retail sales for all child tenant templates
|
|
|
|
Args:
|
|
db: Database session
|
|
|
|
Returns:
|
|
Dict with overall seeding statistics
|
|
"""
|
|
logger.info("=" * 80)
|
|
logger.info("💰 Starting Demo Retail Sales Seeding")
|
|
logger.info("=" * 80)
|
|
logger.info("Creating 30 days of sales history for retail outlets")
|
|
logger.info("")
|
|
|
|
results = []
|
|
|
|
# Seed for each child retail outlet
|
|
for child_tenant_id, child_tenant_name, product_patterns in CHILD_TENANTS:
|
|
logger.info("")
|
|
result = await seed_retail_sales_for_tenant(
|
|
db,
|
|
child_tenant_id,
|
|
f"{child_tenant_name} (Retail Outlet)",
|
|
product_patterns,
|
|
days_of_history=30 # 30 days of sales history
|
|
)
|
|
results.append(result)
|
|
|
|
# Calculate totals
|
|
total_sales = sum(r["sales_created"] for r in results)
|
|
total_skipped = sum(r["sales_skipped"] for r in results)
|
|
|
|
logger.info("=" * 80)
|
|
logger.info("✅ Demo Retail Sales Seeding Completed")
|
|
logger.info("=" * 80)
|
|
|
|
return {
|
|
"service": "sales_retail",
|
|
"tenants_seeded": len(results),
|
|
"total_sales_created": total_sales,
|
|
"total_skipped": total_skipped,
|
|
"results": results
|
|
}
|
|
|
|
|
|
async def main():
|
|
"""Main execution function"""
|
|
|
|
logger.info("Demo Retail Sales Seeding Script Starting")
|
|
logger.info("Mode: %s", os.getenv("DEMO_MODE", "development"))
|
|
logger.info("Log Level: %s", os.getenv("LOG_LEVEL", "INFO"))
|
|
|
|
# Get database URL from environment
|
|
database_url = os.getenv("SALES_DATABASE_URL") or os.getenv("DATABASE_URL")
|
|
if not database_url:
|
|
logger.error("❌ SALES_DATABASE_URL or DATABASE_URL environment variable must be set")
|
|
return 1
|
|
|
|
# Convert to async URL if needed
|
|
if database_url.startswith("postgresql://"):
|
|
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
|
|
|
|
logger.info("Connecting to sales database")
|
|
|
|
# Create engine and session
|
|
engine = create_async_engine(
|
|
database_url,
|
|
echo=False,
|
|
pool_pre_ping=True,
|
|
pool_size=5,
|
|
max_overflow=10
|
|
)
|
|
|
|
async_session = sessionmaker(
|
|
engine,
|
|
class_=AsyncSession,
|
|
expire_on_commit=False
|
|
)
|
|
|
|
try:
|
|
async with async_session() as session:
|
|
result = await seed_retail_sales(session)
|
|
|
|
logger.info("")
|
|
logger.info("📊 Retail Sales Seeding Summary:")
|
|
logger.info(f" ✅ Retail outlets seeded: {result['tenants_seeded']}")
|
|
logger.info(f" ✅ Total sales records: {result['total_sales_created']}")
|
|
logger.info(f" ⏭️ Total skipped: {result['total_skipped']}")
|
|
logger.info("")
|
|
|
|
# Print per-tenant details
|
|
for tenant_result in result['results']:
|
|
logger.info(
|
|
f" {tenant_result['tenant_name']}: "
|
|
f"{tenant_result['sales_created']} sales records"
|
|
)
|
|
|
|
logger.info("")
|
|
logger.info("🎉 Success! Retail sales history is ready for cloning.")
|
|
logger.info("")
|
|
logger.info("Sales characteristics:")
|
|
logger.info(" ✓ 30 days of historical data")
|
|
logger.info(" ✓ Weekend sales boost (30-60% higher)")
|
|
logger.info(" ✓ Friday pre-weekend surge (10-30% higher)")
|
|
logger.info(" ✓ Realistic variance per product")
|
|
logger.info(" ✓ Store-specific pricing and volumes")
|
|
logger.info("")
|
|
logger.info("Next steps:")
|
|
logger.info(" 1. Seed customer data")
|
|
logger.info(" 2. Seed retail orders (internal transfers from parent)")
|
|
logger.info(" 3. Test forecasting with retail sales data")
|
|
logger.info("")
|
|
|
|
return 0
|
|
|
|
except Exception as e:
|
|
logger.error("=" * 80)
|
|
logger.error("❌ Demo Retail Sales Seeding Failed")
|
|
logger.error("=" * 80)
|
|
logger.error("Error: %s", str(e))
|
|
logger.error("", exc_info=True)
|
|
return 1
|
|
|
|
finally:
|
|
await engine.dispose()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
exit_code = asyncio.run(main())
|
|
sys.exit(exit_code)
|