""" Timezone Utility Functions Centralized timezone handling to ensure consistency across the training service """ from datetime import datetime, timezone from typing import Optional, Union import pandas as pd import logging logger = logging.getLogger(__name__) def ensure_timezone_aware(dt: datetime, default_tz=timezone.utc) -> datetime: """ Ensure a datetime is timezone-aware. Args: dt: Datetime to check default_tz: Timezone to apply if datetime is naive (default: UTC) Returns: Timezone-aware datetime """ if dt is None: return None if dt.tzinfo is None: return dt.replace(tzinfo=default_tz) return dt def ensure_timezone_naive(dt: datetime) -> datetime: """ Remove timezone information from a datetime. Args: dt: Datetime to process Returns: Timezone-naive datetime """ if dt is None: return None if dt.tzinfo is not None: return dt.replace(tzinfo=None) return dt def normalize_datetime_to_utc(dt: Union[datetime, pd.Timestamp]) -> datetime: """ Normalize any datetime to UTC timezone-aware datetime. Args: dt: Datetime or pandas Timestamp to normalize Returns: UTC timezone-aware datetime """ if dt is None: return None # Handle pandas Timestamp if isinstance(dt, pd.Timestamp): dt = dt.to_pydatetime() # If naive, assume UTC if dt.tzinfo is None: return dt.replace(tzinfo=timezone.utc) # If aware but not UTC, convert to UTC return dt.astimezone(timezone.utc) def normalize_dataframe_datetime_column( df: pd.DataFrame, column: str, target_format: str = 'naive' ) -> pd.DataFrame: """ Normalize a datetime column in a dataframe to consistent format. Args: df: DataFrame to process column: Name of datetime column target_format: 'naive' or 'aware' (UTC) Returns: DataFrame with normalized datetime column """ if column not in df.columns: logger.warning(f"Column {column} not found in dataframe") return df # Convert to datetime if not already df[column] = pd.to_datetime(df[column]) if target_format == 'naive': # Remove timezone if present if df[column].dt.tz is not None: df[column] = df[column].dt.tz_localize(None) elif target_format == 'aware': # Add UTC timezone if not present if df[column].dt.tz is None: df[column] = df[column].dt.tz_localize(timezone.utc) else: # Convert to UTC if different timezone df[column] = df[column].dt.tz_convert(timezone.utc) else: raise ValueError(f"Invalid target_format: {target_format}. Must be 'naive' or 'aware'") return df def prepare_prophet_datetime(df: pd.DataFrame, datetime_col: str = 'ds') -> pd.DataFrame: """ Prepare datetime column for Prophet (requires timezone-naive datetimes). Args: df: DataFrame with datetime column datetime_col: Name of datetime column (default: 'ds') Returns: DataFrame with Prophet-compatible datetime column """ df = df.copy() df = normalize_dataframe_datetime_column(df, datetime_col, target_format='naive') return df def safe_datetime_comparison(dt1: datetime, dt2: datetime) -> int: """ Safely compare two datetimes, handling timezone mismatches. Args: dt1: First datetime dt2: Second datetime Returns: -1 if dt1 < dt2, 0 if equal, 1 if dt1 > dt2 """ # Normalize both to UTC for comparison dt1_utc = normalize_datetime_to_utc(dt1) dt2_utc = normalize_datetime_to_utc(dt2) if dt1_utc < dt2_utc: return -1 elif dt1_utc > dt2_utc: return 1 else: return 0 def get_current_utc() -> datetime: """ Get current datetime in UTC with timezone awareness. Returns: Current UTC datetime """ return datetime.now(timezone.utc) def convert_timestamp_to_datetime(timestamp: Union[int, float, str]) -> datetime: """ Convert various timestamp formats to datetime. Args: timestamp: Unix timestamp (seconds or milliseconds) or ISO string Returns: UTC timezone-aware datetime """ if isinstance(timestamp, str): dt = pd.to_datetime(timestamp) return normalize_datetime_to_utc(dt) # Check if milliseconds (typical JavaScript timestamp) if timestamp > 1e10: timestamp = timestamp / 1000 dt = datetime.fromtimestamp(timestamp, tz=timezone.utc) return dt