Fix issues 3
This commit is contained in:
@@ -176,36 +176,65 @@ class TrainingDataOrchestrator:
|
||||
logger.error(f"Training data preparation failed: {str(e)}")
|
||||
raise ValueError(f"Failed to prepare training data: {str(e)}")
|
||||
|
||||
def _extract_sales_date_range(self, sales_data: List[Dict[str, Any]]) -> DateRange:
|
||||
"""Extract date range from sales data with timezone handling and strict date format."""
|
||||
if not sales_data:
|
||||
raise ValueError("No sales data provided")
|
||||
def extract_sales_date_range_utc_localize(sales_data_df: pd.DataFrame):
|
||||
"""
|
||||
Extracts the UTC-aware date range from a sales DataFrame using tz_localize.
|
||||
|
||||
dates = []
|
||||
|
||||
for record in sales_data:
|
||||
date_value = record.get('date')
|
||||
if not date_value:
|
||||
continue # Skip records with missing date
|
||||
Args:
|
||||
sales_data_df: A pandas DataFrame containing a 'date' column.
|
||||
|
||||
if isinstance(date_value, str):
|
||||
# Parse string with explicit format
|
||||
dt = pd.to_datetime(date_value, format='mixed', errors='raise')
|
||||
if dt.tz is None:
|
||||
dt = dt.tz_localize('UTC') # Assign UTC timezone if none
|
||||
dates.append(dt.to_pydatetime())
|
||||
elif isinstance(date_value, datetime):
|
||||
if date_value.tzinfo is None:
|
||||
date_value = date_value.replace(tzinfo=timezone.utc)
|
||||
dates.append(date_value)
|
||||
else:
|
||||
continue
|
||||
Returns:
|
||||
A tuple of timezone-aware start and end dates in UTC.
|
||||
"""
|
||||
if 'date' not in sales_data_df.columns:
|
||||
raise ValueError("DataFrame does not contain a 'date' column.")
|
||||
|
||||
if not dates:
|
||||
# Convert the 'date' column to datetime objects
|
||||
sales_data_df['date'] = pd.to_datetime(sales_data_df['date'])
|
||||
|
||||
# Localize the naive datetime objects to UTC
|
||||
sales_data_df['date'] = sales_data_df['date'].tz_localize('UTC')
|
||||
|
||||
# Find the minimum and maximum dates
|
||||
start_date = sales_data_df['date'].min()
|
||||
end_date = sales_data_df['date'].max()
|
||||
|
||||
return DateRange(start_date, end_date, DataSourceType.BAKERY_SALES)
|
||||
|
||||
def _extract_sales_date_range(self, sales_data: List[Dict[str, Any]]) -> 'DateRange':
|
||||
"""
|
||||
Extract date range from sales data with proper date parsing
|
||||
|
||||
Args:
|
||||
sales_data: List of sales records
|
||||
|
||||
Returns:
|
||||
DateRange object with timezone-aware start and end dates
|
||||
"""
|
||||
if not sales_data:
|
||||
raise ValueError("No sales data provided for date range extraction")
|
||||
|
||||
# Convert to DataFrame for easier processing
|
||||
sales_df = pd.DataFrame(sales_data)
|
||||
|
||||
if 'date' not in sales_df.columns:
|
||||
raise ValueError("Sales data does not contain a 'date' column")
|
||||
|
||||
# Convert dates to datetime with proper parsing
|
||||
# This will use the improved date parsing from the data import service
|
||||
sales_df['date'] = pd.to_datetime(sales_df['date'], utc=True, errors='coerce')
|
||||
|
||||
# Remove any rows with invalid dates
|
||||
sales_df = sales_df.dropna(subset=['date'])
|
||||
|
||||
if len(sales_df) == 0:
|
||||
raise ValueError("No valid dates found in sales data")
|
||||
|
||||
start_date = min(dates)
|
||||
end_date = max(dates)
|
||||
# Find the minimum and maximum dates
|
||||
start_date = sales_df['date'].min()
|
||||
end_date = sales_df['date'].max()
|
||||
|
||||
logger.info(f"Extracted sales date range: {start_date} to {end_date}")
|
||||
|
||||
return DateRange(start_date, end_date, DataSourceType.BAKERY_SALES)
|
||||
|
||||
|
||||
@@ -6,7 +6,8 @@ Main training service that uses the repository pattern for data access
|
||||
from typing import Dict, List, Any, Optional
|
||||
import uuid
|
||||
import structlog
|
||||
from datetime import datetime
|
||||
from datetime import datetime, date, timezone
|
||||
from decimal import Decimal
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
import json
|
||||
import numpy as np
|
||||
@@ -37,10 +38,26 @@ logger = structlog.get_logger()
|
||||
|
||||
def make_json_serializable(obj):
|
||||
"""Convert numpy/pandas types, datetime, and UUID objects to JSON-serializable Python types"""
|
||||
import uuid
|
||||
from decimal import Decimal
|
||||
from datetime import datetime, date
|
||||
|
||||
# Handle None values
|
||||
if obj is None:
|
||||
return None
|
||||
|
||||
# Handle basic datetime types first (most common)
|
||||
if isinstance(obj, datetime):
|
||||
return obj.isoformat()
|
||||
elif isinstance(obj, date):
|
||||
return obj.isoformat()
|
||||
|
||||
# Handle pandas timestamp types
|
||||
if hasattr(pd, 'Timestamp') and isinstance(obj, pd.Timestamp):
|
||||
return obj.isoformat()
|
||||
|
||||
# Handle numpy datetime types
|
||||
if hasattr(np, 'datetime64') and isinstance(obj, np.datetime64):
|
||||
return pd.Timestamp(obj).isoformat()
|
||||
|
||||
# Handle numeric types
|
||||
if isinstance(obj, (np.integer, pd.Int64Dtype)):
|
||||
return int(obj)
|
||||
elif isinstance(obj, (np.floating, pd.Float64Dtype)):
|
||||
@@ -51,19 +68,36 @@ def make_json_serializable(obj):
|
||||
return obj.tolist()
|
||||
elif isinstance(obj, pd.DataFrame):
|
||||
return obj.to_dict('records')
|
||||
elif isinstance(obj, Decimal):
|
||||
return float(obj)
|
||||
|
||||
# Handle UUID types
|
||||
elif isinstance(obj, uuid.UUID):
|
||||
return str(obj)
|
||||
elif hasattr(obj, '__class__') and 'UUID' in str(obj.__class__):
|
||||
# Handle any UUID-like objects (including asyncpg.pgproto.pgproto.UUID)
|
||||
return str(obj)
|
||||
elif isinstance(obj, Decimal):
|
||||
return float(obj)
|
||||
|
||||
# Handle collections recursively
|
||||
elif isinstance(obj, dict):
|
||||
return {k: make_json_serializable(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
elif isinstance(obj, (list, tuple)):
|
||||
return [make_json_serializable(item) for item in obj]
|
||||
else:
|
||||
elif isinstance(obj, set):
|
||||
return [make_json_serializable(item) for item in obj]
|
||||
|
||||
# Handle other common types
|
||||
elif isinstance(obj, (str, int, float, bool)):
|
||||
return obj
|
||||
|
||||
# Last resort: try to convert to string
|
||||
else:
|
||||
try:
|
||||
# For any other object, try to convert to string
|
||||
return str(obj)
|
||||
except Exception:
|
||||
# If all else fails, return None
|
||||
return None
|
||||
|
||||
|
||||
class EnhancedTrainingService:
|
||||
|
||||
Reference in New Issue
Block a user