REFACTOR external service and improve websocket training
This commit is contained in:
184
services/training/app/utils/timezone_utils.py
Normal file
184
services/training/app/utils/timezone_utils.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""
|
||||
Timezone Utility Functions
|
||||
Centralized timezone handling to ensure consistency across the training service
|
||||
"""
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional, Union
|
||||
import pandas as pd
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def ensure_timezone_aware(dt: datetime, default_tz=timezone.utc) -> datetime:
|
||||
"""
|
||||
Ensure a datetime is timezone-aware.
|
||||
|
||||
Args:
|
||||
dt: Datetime to check
|
||||
default_tz: Timezone to apply if datetime is naive (default: UTC)
|
||||
|
||||
Returns:
|
||||
Timezone-aware datetime
|
||||
"""
|
||||
if dt is None:
|
||||
return None
|
||||
|
||||
if dt.tzinfo is None:
|
||||
return dt.replace(tzinfo=default_tz)
|
||||
return dt
|
||||
|
||||
|
||||
def ensure_timezone_naive(dt: datetime) -> datetime:
|
||||
"""
|
||||
Remove timezone information from a datetime.
|
||||
|
||||
Args:
|
||||
dt: Datetime to process
|
||||
|
||||
Returns:
|
||||
Timezone-naive datetime
|
||||
"""
|
||||
if dt is None:
|
||||
return None
|
||||
|
||||
if dt.tzinfo is not None:
|
||||
return dt.replace(tzinfo=None)
|
||||
return dt
|
||||
|
||||
|
||||
def normalize_datetime_to_utc(dt: Union[datetime, pd.Timestamp]) -> datetime:
|
||||
"""
|
||||
Normalize any datetime to UTC timezone-aware datetime.
|
||||
|
||||
Args:
|
||||
dt: Datetime or pandas Timestamp to normalize
|
||||
|
||||
Returns:
|
||||
UTC timezone-aware datetime
|
||||
"""
|
||||
if dt is None:
|
||||
return None
|
||||
|
||||
# Handle pandas Timestamp
|
||||
if isinstance(dt, pd.Timestamp):
|
||||
dt = dt.to_pydatetime()
|
||||
|
||||
# If naive, assume UTC
|
||||
if dt.tzinfo is None:
|
||||
return dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
# If aware but not UTC, convert to UTC
|
||||
return dt.astimezone(timezone.utc)
|
||||
|
||||
|
||||
def normalize_dataframe_datetime_column(
|
||||
df: pd.DataFrame,
|
||||
column: str,
|
||||
target_format: str = 'naive'
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Normalize a datetime column in a dataframe to consistent format.
|
||||
|
||||
Args:
|
||||
df: DataFrame to process
|
||||
column: Name of datetime column
|
||||
target_format: 'naive' or 'aware' (UTC)
|
||||
|
||||
Returns:
|
||||
DataFrame with normalized datetime column
|
||||
"""
|
||||
if column not in df.columns:
|
||||
logger.warning(f"Column {column} not found in dataframe")
|
||||
return df
|
||||
|
||||
# Convert to datetime if not already
|
||||
df[column] = pd.to_datetime(df[column])
|
||||
|
||||
if target_format == 'naive':
|
||||
# Remove timezone if present
|
||||
if df[column].dt.tz is not None:
|
||||
df[column] = df[column].dt.tz_localize(None)
|
||||
elif target_format == 'aware':
|
||||
# Add UTC timezone if not present
|
||||
if df[column].dt.tz is None:
|
||||
df[column] = df[column].dt.tz_localize(timezone.utc)
|
||||
else:
|
||||
# Convert to UTC if different timezone
|
||||
df[column] = df[column].dt.tz_convert(timezone.utc)
|
||||
else:
|
||||
raise ValueError(f"Invalid target_format: {target_format}. Must be 'naive' or 'aware'")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def prepare_prophet_datetime(df: pd.DataFrame, datetime_col: str = 'ds') -> pd.DataFrame:
|
||||
"""
|
||||
Prepare datetime column for Prophet (requires timezone-naive datetimes).
|
||||
|
||||
Args:
|
||||
df: DataFrame with datetime column
|
||||
datetime_col: Name of datetime column (default: 'ds')
|
||||
|
||||
Returns:
|
||||
DataFrame with Prophet-compatible datetime column
|
||||
"""
|
||||
df = df.copy()
|
||||
df = normalize_dataframe_datetime_column(df, datetime_col, target_format='naive')
|
||||
return df
|
||||
|
||||
|
||||
def safe_datetime_comparison(dt1: datetime, dt2: datetime) -> int:
|
||||
"""
|
||||
Safely compare two datetimes, handling timezone mismatches.
|
||||
|
||||
Args:
|
||||
dt1: First datetime
|
||||
dt2: Second datetime
|
||||
|
||||
Returns:
|
||||
-1 if dt1 < dt2, 0 if equal, 1 if dt1 > dt2
|
||||
"""
|
||||
# Normalize both to UTC for comparison
|
||||
dt1_utc = normalize_datetime_to_utc(dt1)
|
||||
dt2_utc = normalize_datetime_to_utc(dt2)
|
||||
|
||||
if dt1_utc < dt2_utc:
|
||||
return -1
|
||||
elif dt1_utc > dt2_utc:
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
def get_current_utc() -> datetime:
|
||||
"""
|
||||
Get current datetime in UTC with timezone awareness.
|
||||
|
||||
Returns:
|
||||
Current UTC datetime
|
||||
"""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def convert_timestamp_to_datetime(timestamp: Union[int, float, str]) -> datetime:
|
||||
"""
|
||||
Convert various timestamp formats to datetime.
|
||||
|
||||
Args:
|
||||
timestamp: Unix timestamp (seconds or milliseconds) or ISO string
|
||||
|
||||
Returns:
|
||||
UTC timezone-aware datetime
|
||||
"""
|
||||
if isinstance(timestamp, str):
|
||||
dt = pd.to_datetime(timestamp)
|
||||
return normalize_datetime_to_utc(dt)
|
||||
|
||||
# Check if milliseconds (typical JavaScript timestamp)
|
||||
if timestamp > 1e10:
|
||||
timestamp = timestamp / 1000
|
||||
|
||||
dt = datetime.fromtimestamp(timestamp, tz=timezone.utc)
|
||||
return dt
|
||||
Reference in New Issue
Block a user