# services/external/app/jobs/initialize_data.py """ Kubernetes Init Job - Initialize 24-month historical data """ import asyncio import argparse import sys import logging import structlog from app.ingestion.ingestion_manager import DataIngestionManager from app.core.database import database_manager logger = structlog.get_logger() async def main(months: int = 24): """Initialize historical data for all enabled cities and seed calendars""" logger.info("Starting data initialization job", months=months) try: manager = DataIngestionManager() # Initialize weather and traffic data weather_traffic_success = await manager.initialize_all_cities(months=months) # Seed school calendars logger.info("Proceeding to seed school calendars...") calendar_success = await manager.seed_school_calendars() # Calendar seeding is critical, but weather/traffic can have partial success overall_success = calendar_success and weather_traffic_success if overall_success: logger.info("✅ Data initialization completed successfully (weather, traffic, calendars)") sys.exit(0) else: if not calendar_success: logger.error("❌ Calendar seeding failed - this is critical") sys.exit(1) elif not weather_traffic_success: # Log as warning instead of error if some data was retrieved logger.warning("⚠️ Weather/traffic initialization had partial failures, but system can continue") logger.info("✅ Calendar seeding completed - system can operate with available data") sys.exit(0) # Allow partial success for weather/traffic except Exception as e: logger.error("❌ Fatal error during initialization", error=str(e)) sys.exit(1) finally: await database_manager.close_connections() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Initialize historical data") parser.add_argument("--months", type=int, default=24, help="Number of months to load") parser.add_argument("--log-level", default="INFO", help="Log level") args = parser.parse_args() # Convert string log level to logging constant log_level = getattr(logging, args.log_level.upper(), logging.INFO) structlog.configure( wrapper_class=structlog.make_filtering_bound_logger(log_level) ) asyncio.run(main(months=args.months))