67 lines
2.1 KiB
Python
67 lines
2.1 KiB
Python
# services/external/app/jobs/initialize_data.py
|
|
"""
|
|
Kubernetes Init Job - Initialize 24-month historical data
|
|
"""
|
|
|
|
import asyncio
|
|
import argparse
|
|
import sys
|
|
import logging
|
|
import structlog
|
|
|
|
from app.ingestion.ingestion_manager import DataIngestionManager
|
|
from app.core.database import database_manager
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
async def main(months: int = 24):
|
|
"""Initialize historical data for all enabled cities and seed calendars"""
|
|
logger.info("Starting data initialization job", months=months)
|
|
|
|
try:
|
|
manager = DataIngestionManager()
|
|
|
|
# Initialize weather and traffic data
|
|
weather_traffic_success = await manager.initialize_all_cities(months=months)
|
|
|
|
# Seed school calendars
|
|
logger.info("Proceeding to seed school calendars...")
|
|
calendar_success = await manager.seed_school_calendars()
|
|
|
|
# Both must succeed
|
|
overall_success = weather_traffic_success and calendar_success
|
|
|
|
if overall_success:
|
|
logger.info("✅ Data initialization completed successfully (weather, traffic, calendars)")
|
|
sys.exit(0)
|
|
else:
|
|
if not weather_traffic_success:
|
|
logger.error("❌ Weather/traffic initialization failed")
|
|
if not calendar_success:
|
|
logger.error("❌ Calendar seeding failed")
|
|
sys.exit(1)
|
|
|
|
except Exception as e:
|
|
logger.error("❌ Fatal error during initialization", error=str(e))
|
|
sys.exit(1)
|
|
finally:
|
|
await database_manager.close_connections()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Initialize historical data")
|
|
parser.add_argument("--months", type=int, default=24, help="Number of months to load")
|
|
parser.add_argument("--log-level", default="INFO", help="Log level")
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Convert string log level to logging constant
|
|
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
|
|
|
|
structlog.configure(
|
|
wrapper_class=structlog.make_filtering_bound_logger(log_level)
|
|
)
|
|
|
|
asyncio.run(main(months=args.months))
|