Files
bakery-ia/services/training/app/utils/timezone_utils.py

185 lines
4.5 KiB
Python
Raw Normal View History

"""
Timezone Utility Functions
Centralized timezone handling to ensure consistency across the training service
"""
from datetime import datetime, timezone
from typing import Optional, Union
import pandas as pd
import logging
logger = logging.getLogger(__name__)
def ensure_timezone_aware(dt: datetime, default_tz=timezone.utc) -> datetime:
"""
Ensure a datetime is timezone-aware.
Args:
dt: Datetime to check
default_tz: Timezone to apply if datetime is naive (default: UTC)
Returns:
Timezone-aware datetime
"""
if dt is None:
return None
if dt.tzinfo is None:
return dt.replace(tzinfo=default_tz)
return dt
def ensure_timezone_naive(dt: datetime) -> datetime:
"""
Remove timezone information from a datetime.
Args:
dt: Datetime to process
Returns:
Timezone-naive datetime
"""
if dt is None:
return None
if dt.tzinfo is not None:
return dt.replace(tzinfo=None)
return dt
def normalize_datetime_to_utc(dt: Union[datetime, pd.Timestamp]) -> datetime:
"""
Normalize any datetime to UTC timezone-aware datetime.
Args:
dt: Datetime or pandas Timestamp to normalize
Returns:
UTC timezone-aware datetime
"""
if dt is None:
return None
# Handle pandas Timestamp
if isinstance(dt, pd.Timestamp):
dt = dt.to_pydatetime()
# If naive, assume UTC
if dt.tzinfo is None:
return dt.replace(tzinfo=timezone.utc)
# If aware but not UTC, convert to UTC
return dt.astimezone(timezone.utc)
def normalize_dataframe_datetime_column(
df: pd.DataFrame,
column: str,
target_format: str = 'naive'
) -> pd.DataFrame:
"""
Normalize a datetime column in a dataframe to consistent format.
Args:
df: DataFrame to process
column: Name of datetime column
target_format: 'naive' or 'aware' (UTC)
Returns:
DataFrame with normalized datetime column
"""
if column not in df.columns:
logger.warning(f"Column {column} not found in dataframe")
return df
# Convert to datetime if not already
df[column] = pd.to_datetime(df[column])
if target_format == 'naive':
# Remove timezone if present
if df[column].dt.tz is not None:
df[column] = df[column].dt.tz_localize(None)
elif target_format == 'aware':
# Add UTC timezone if not present
if df[column].dt.tz is None:
df[column] = df[column].dt.tz_localize(timezone.utc)
else:
# Convert to UTC if different timezone
df[column] = df[column].dt.tz_convert(timezone.utc)
else:
raise ValueError(f"Invalid target_format: {target_format}. Must be 'naive' or 'aware'")
return df
def prepare_prophet_datetime(df: pd.DataFrame, datetime_col: str = 'ds') -> pd.DataFrame:
"""
Prepare datetime column for Prophet (requires timezone-naive datetimes).
Args:
df: DataFrame with datetime column
datetime_col: Name of datetime column (default: 'ds')
Returns:
DataFrame with Prophet-compatible datetime column
"""
df = df.copy()
df = normalize_dataframe_datetime_column(df, datetime_col, target_format='naive')
return df
def safe_datetime_comparison(dt1: datetime, dt2: datetime) -> int:
"""
Safely compare two datetimes, handling timezone mismatches.
Args:
dt1: First datetime
dt2: Second datetime
Returns:
-1 if dt1 < dt2, 0 if equal, 1 if dt1 > dt2
"""
# Normalize both to UTC for comparison
dt1_utc = normalize_datetime_to_utc(dt1)
dt2_utc = normalize_datetime_to_utc(dt2)
if dt1_utc < dt2_utc:
return -1
elif dt1_utc > dt2_utc:
return 1
else:
return 0
def get_current_utc() -> datetime:
"""
Get current datetime in UTC with timezone awareness.
Returns:
Current UTC datetime
"""
return datetime.now(timezone.utc)
def convert_timestamp_to_datetime(timestamp: Union[int, float, str]) -> datetime:
"""
Convert various timestamp formats to datetime.
Args:
timestamp: Unix timestamp (seconds or milliseconds) or ISO string
Returns:
UTC timezone-aware datetime
"""
if isinstance(timestamp, str):
dt = pd.to_datetime(timestamp)
return normalize_datetime_to_utc(dt)
# Check if milliseconds (typical JavaScript timestamp)
if timestamp > 1e10:
timestamp = timestamp / 1000
dt = datetime.fromtimestamp(timestamp, tz=timezone.utc)
return dt