Fix issues 3
This commit is contained in:
@@ -176,36 +176,65 @@ class TrainingDataOrchestrator:
|
||||
logger.error(f"Training data preparation failed: {str(e)}")
|
||||
raise ValueError(f"Failed to prepare training data: {str(e)}")
|
||||
|
||||
def _extract_sales_date_range(self, sales_data: List[Dict[str, Any]]) -> DateRange:
|
||||
"""Extract date range from sales data with timezone handling and strict date format."""
|
||||
if not sales_data:
|
||||
raise ValueError("No sales data provided")
|
||||
def extract_sales_date_range_utc_localize(sales_data_df: pd.DataFrame):
|
||||
"""
|
||||
Extracts the UTC-aware date range from a sales DataFrame using tz_localize.
|
||||
|
||||
dates = []
|
||||
|
||||
for record in sales_data:
|
||||
date_value = record.get('date')
|
||||
if not date_value:
|
||||
continue # Skip records with missing date
|
||||
Args:
|
||||
sales_data_df: A pandas DataFrame containing a 'date' column.
|
||||
|
||||
if isinstance(date_value, str):
|
||||
# Parse string with explicit format
|
||||
dt = pd.to_datetime(date_value, format='mixed', errors='raise')
|
||||
if dt.tz is None:
|
||||
dt = dt.tz_localize('UTC') # Assign UTC timezone if none
|
||||
dates.append(dt.to_pydatetime())
|
||||
elif isinstance(date_value, datetime):
|
||||
if date_value.tzinfo is None:
|
||||
date_value = date_value.replace(tzinfo=timezone.utc)
|
||||
dates.append(date_value)
|
||||
else:
|
||||
continue
|
||||
Returns:
|
||||
A tuple of timezone-aware start and end dates in UTC.
|
||||
"""
|
||||
if 'date' not in sales_data_df.columns:
|
||||
raise ValueError("DataFrame does not contain a 'date' column.")
|
||||
|
||||
if not dates:
|
||||
# Convert the 'date' column to datetime objects
|
||||
sales_data_df['date'] = pd.to_datetime(sales_data_df['date'])
|
||||
|
||||
# Localize the naive datetime objects to UTC
|
||||
sales_data_df['date'] = sales_data_df['date'].tz_localize('UTC')
|
||||
|
||||
# Find the minimum and maximum dates
|
||||
start_date = sales_data_df['date'].min()
|
||||
end_date = sales_data_df['date'].max()
|
||||
|
||||
return DateRange(start_date, end_date, DataSourceType.BAKERY_SALES)
|
||||
|
||||
def _extract_sales_date_range(self, sales_data: List[Dict[str, Any]]) -> 'DateRange':
|
||||
"""
|
||||
Extract date range from sales data with proper date parsing
|
||||
|
||||
Args:
|
||||
sales_data: List of sales records
|
||||
|
||||
Returns:
|
||||
DateRange object with timezone-aware start and end dates
|
||||
"""
|
||||
if not sales_data:
|
||||
raise ValueError("No sales data provided for date range extraction")
|
||||
|
||||
# Convert to DataFrame for easier processing
|
||||
sales_df = pd.DataFrame(sales_data)
|
||||
|
||||
if 'date' not in sales_df.columns:
|
||||
raise ValueError("Sales data does not contain a 'date' column")
|
||||
|
||||
# Convert dates to datetime with proper parsing
|
||||
# This will use the improved date parsing from the data import service
|
||||
sales_df['date'] = pd.to_datetime(sales_df['date'], utc=True, errors='coerce')
|
||||
|
||||
# Remove any rows with invalid dates
|
||||
sales_df = sales_df.dropna(subset=['date'])
|
||||
|
||||
if len(sales_df) == 0:
|
||||
raise ValueError("No valid dates found in sales data")
|
||||
|
||||
start_date = min(dates)
|
||||
end_date = max(dates)
|
||||
# Find the minimum and maximum dates
|
||||
start_date = sales_df['date'].min()
|
||||
end_date = sales_df['date'].max()
|
||||
|
||||
logger.info(f"Extracted sales date range: {start_date} to {end_date}")
|
||||
|
||||
return DateRange(start_date, end_date, DataSourceType.BAKERY_SALES)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user