Files
bakery-ia/services/external/app/jobs/initialize_data.py

70 lines
2.5 KiB
Python
Raw Permalink Normal View History

# services/external/app/jobs/initialize_data.py
"""
Kubernetes Init Job - Initialize 24-month historical data
"""
import asyncio
import argparse
import sys
import logging
import structlog
from app.ingestion.ingestion_manager import DataIngestionManager
from app.core.database import database_manager
logger = structlog.get_logger()
async def main(months: int = 24):
2025-11-02 20:24:44 +01:00
"""Initialize historical data for all enabled cities and seed calendars"""
logger.info("Starting data initialization job", months=months)
try:
manager = DataIngestionManager()
2025-11-02 20:24:44 +01:00
# Initialize weather and traffic data
weather_traffic_success = await manager.initialize_all_cities(months=months)
# Seed school calendars
logger.info("Proceeding to seed school calendars...")
calendar_success = await manager.seed_school_calendars()
# Calendar seeding is critical, but weather/traffic can have partial success
overall_success = calendar_success and weather_traffic_success
2025-11-02 20:24:44 +01:00
if overall_success:
logger.info("✅ Data initialization completed successfully (weather, traffic, calendars)")
sys.exit(0)
else:
2025-11-02 20:24:44 +01:00
if not calendar_success:
logger.error("❌ Calendar seeding failed - this is critical")
sys.exit(1)
elif not weather_traffic_success:
# Log as warning instead of error if some data was retrieved
logger.warning("⚠️ Weather/traffic initialization had partial failures, but system can continue")
logger.info("✅ Calendar seeding completed - system can operate with available data")
sys.exit(0) # Allow partial success for weather/traffic
except Exception as e:
logger.error("❌ Fatal error during initialization", error=str(e))
sys.exit(1)
finally:
await database_manager.close_connections()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Initialize historical data")
parser.add_argument("--months", type=int, default=24, help="Number of months to load")
parser.add_argument("--log-level", default="INFO", help="Log level")
args = parser.parse_args()
# Convert string log level to logging constant
log_level = getattr(logging, args.log_level.upper(), logging.INFO)
structlog.configure(
wrapper_class=structlog.make_filtering_bound_logger(log_level)
)
asyncio.run(main(months=args.months))