From cafd316c4b53ad02f8e01886f235fe73ab0a0450 Mon Sep 17 00:00:00 2001 From: Urtzi Alfaro Date: Sun, 17 Aug 2025 13:35:05 +0200 Subject: [PATCH] Fix issues 3 --- .../src/pages/onboarding/OnboardingPage.tsx | 15 +++- frontend/src/store/slices/authSlice.ts | 7 +- .../sales/app/services/data_import_service.py | 55 ++++++++++--- .../app/services/training_orchestrator.py | 79 +++++++++++++------ .../training/app/services/training_service.py | 50 ++++++++++-- 5 files changed, 157 insertions(+), 49 deletions(-) diff --git a/frontend/src/pages/onboarding/OnboardingPage.tsx b/frontend/src/pages/onboarding/OnboardingPage.tsx index 9f5779f1..1fb1238a 100644 --- a/frontend/src/pages/onboarding/OnboardingPage.tsx +++ b/frontend/src/pages/onboarding/OnboardingPage.tsx @@ -1,7 +1,7 @@ import React, { useState, useEffect, useCallback, useRef } from 'react'; import { ChevronLeft, ChevronRight, Upload, MapPin, Store, Check, Brain, Clock, CheckCircle, AlertTriangle, Loader, TrendingUp } from 'lucide-react'; import { useNavigate } from 'react-router-dom'; -import { useSelector } from 'react-redux'; +import { useSelector, useDispatch } from 'react-redux'; import toast from 'react-hot-toast'; import SimplifiedTrainingProgress from '../../components/SimplifiedTrainingProgress'; @@ -19,6 +19,7 @@ import { useTraining } from '../../api/hooks/useTraining'; import { OnboardingRouter } from '../../utils/onboardingRouter'; import type { RootState } from '../../store'; +import { completeOnboarding } from '../../store/slices/authSlice'; interface OnboardingPageProps { user?: any; @@ -53,13 +54,17 @@ const MADRID_PRODUCTS = [ const OnboardingPage: React.FC = ({ user: propUser, onComplete: propOnComplete }) => { const navigate = useNavigate(); + const dispatch = useDispatch(); const { user: reduxUser } = useSelector((state: RootState) => state.auth); // Use prop user if provided, otherwise use Redux user const user = propUser || reduxUser; - // Use prop onComplete if provided, otherwise navigate to dashboard - const onComplete = propOnComplete || (() => navigate('/app/dashboard')); + // Use prop onComplete if provided, otherwise create a function that marks onboarding complete and navigates + const onComplete = propOnComplete || (() => { + dispatch(completeOnboarding()); + navigate('/app/dashboard'); + }); const [currentStep, setCurrentStep] = useState(1); const [isLoading, setIsLoading] = useState(false); @@ -600,12 +605,16 @@ const OnboardingPage: React.FC = ({ user: propUser, onCompl final_step: true }); + // Mark onboarding as complete in Redux state + dispatch(completeOnboarding()); + // Complete onboarding toast.success('¡Configuración completada exitosamente!'); onComplete(); } catch (error) { // Failed to mark final step as completed // Continue anyway for better UX + dispatch(completeOnboarding()); toast.success('¡Configuración completada exitosamente!'); onComplete(); } diff --git a/frontend/src/store/slices/authSlice.ts b/frontend/src/store/slices/authSlice.ts index 2f6a0f50..5f5cd0a2 100644 --- a/frontend/src/store/slices/authSlice.ts +++ b/frontend/src/store/slices/authSlice.ts @@ -58,8 +58,13 @@ const authSlice = createSlice({ clearError: (state) => { state.error = null; }, + completeOnboarding: (state) => { + if (state.user) { + state.user.isOnboardingComplete = true; + } + }, }, }); -export const { loginStart, loginSuccess, loginFailure, logout, clearError } = authSlice.actions; +export const { loginStart, loginSuccess, loginFailure, logout, clearError, completeOnboarding } = authSlice.actions; export default authSlice.reducer; \ No newline at end of file diff --git a/services/sales/app/services/data_import_service.py b/services/sales/app/services/data_import_service.py index b946bfa1..7fd93bc6 100644 --- a/services/sales/app/services/data_import_service.py +++ b/services/sales/app/services/data_import_service.py @@ -865,34 +865,65 @@ class DataImportService: return mapping def _parse_date(self, date_str: str) -> Optional[datetime]: - """Enhanced date parsing with pandas and multiple format support""" + """Enhanced date parsing with explicit format handling for CSV dates""" if not date_str or str(date_str).lower() in ['nan', 'null', 'none']: return None date_str = str(date_str).strip() - # Try pandas first (most robust) + # For CSV format like "2024/10/01", try specific formats first to avoid ambiguity + # Priority order: YYYY/MM/DD (most likely for machine-generated data) + priority_formats = [ + '%Y/%m/%d', # 2024/10/01 (October 1, 2024) - most likely for CSV exports + '%Y-%m-%d', # 2024-10-01 + '%d/%m/%Y', # 01/10/2024 (European format) + '%m/%d/%Y', # 10/01/2024 (US format) + ] + + # Try priority formats first + for fmt in priority_formats: + try: + parsed_dt = datetime.strptime(date_str, fmt) + if parsed_dt.tzinfo is None: + parsed_dt = parsed_dt.replace(tzinfo=timezone.utc) + logger.debug(f"Successfully parsed date '{date_str}' using format '{fmt}' -> {parsed_dt}") + return parsed_dt + except ValueError: + continue + + # Try pandas as fallback with explicit format inference try: - parsed_dt = pd.to_datetime(date_str, dayfirst=True) + # For YYYY/MM/DD format, disable dayfirst to prevent misinterpretation + if '/' in date_str and len(date_str.split('/')[0]) == 4: + # Looks like YYYY/MM/DD format, so don't use dayfirst + parsed_dt = pd.to_datetime(date_str, dayfirst=False) + else: + # For other formats, use dayfirst=True for European-style dates + parsed_dt = pd.to_datetime(date_str, dayfirst=True) + if hasattr(parsed_dt, 'to_pydatetime'): parsed_dt = parsed_dt.to_pydatetime() if parsed_dt.tzinfo is None: parsed_dt = parsed_dt.replace(tzinfo=timezone.utc) + logger.debug(f"Successfully parsed date '{date_str}' using pandas -> {parsed_dt}") return parsed_dt - except Exception: + except Exception as e: + logger.debug(f"Pandas date parsing failed for '{date_str}': {e}") pass - # Try specific formats as fallback + # Try remaining formats as last fallback for fmt in self.DATE_FORMATS: - try: - parsed_dt = datetime.strptime(date_str, fmt) - if parsed_dt.tzinfo is None: - parsed_dt = parsed_dt.replace(tzinfo=timezone.utc) - return parsed_dt - except ValueError: - continue + if fmt not in priority_formats: # Skip already tried formats + try: + parsed_dt = datetime.strptime(date_str, fmt) + if parsed_dt.tzinfo is None: + parsed_dt = parsed_dt.replace(tzinfo=timezone.utc) + logger.debug(f"Successfully parsed date '{date_str}' using fallback format '{fmt}' -> {parsed_dt}") + return parsed_dt + except ValueError: + continue logger.warning(f"Could not parse date: {date_str}") return None diff --git a/services/training/app/services/training_orchestrator.py b/services/training/app/services/training_orchestrator.py index d2cc3dfd..518efafb 100644 --- a/services/training/app/services/training_orchestrator.py +++ b/services/training/app/services/training_orchestrator.py @@ -176,36 +176,65 @@ class TrainingDataOrchestrator: logger.error(f"Training data preparation failed: {str(e)}") raise ValueError(f"Failed to prepare training data: {str(e)}") - def _extract_sales_date_range(self, sales_data: List[Dict[str, Any]]) -> DateRange: - """Extract date range from sales data with timezone handling and strict date format.""" - if not sales_data: - raise ValueError("No sales data provided") + def extract_sales_date_range_utc_localize(sales_data_df: pd.DataFrame): + """ + Extracts the UTC-aware date range from a sales DataFrame using tz_localize. - dates = [] - - for record in sales_data: - date_value = record.get('date') - if not date_value: - continue # Skip records with missing date + Args: + sales_data_df: A pandas DataFrame containing a 'date' column. - if isinstance(date_value, str): - # Parse string with explicit format - dt = pd.to_datetime(date_value, format='mixed', errors='raise') - if dt.tz is None: - dt = dt.tz_localize('UTC') # Assign UTC timezone if none - dates.append(dt.to_pydatetime()) - elif isinstance(date_value, datetime): - if date_value.tzinfo is None: - date_value = date_value.replace(tzinfo=timezone.utc) - dates.append(date_value) - else: - continue + Returns: + A tuple of timezone-aware start and end dates in UTC. + """ + if 'date' not in sales_data_df.columns: + raise ValueError("DataFrame does not contain a 'date' column.") - if not dates: + # Convert the 'date' column to datetime objects + sales_data_df['date'] = pd.to_datetime(sales_data_df['date']) + + # Localize the naive datetime objects to UTC + sales_data_df['date'] = sales_data_df['date'].tz_localize('UTC') + + # Find the minimum and maximum dates + start_date = sales_data_df['date'].min() + end_date = sales_data_df['date'].max() + + return DateRange(start_date, end_date, DataSourceType.BAKERY_SALES) + + def _extract_sales_date_range(self, sales_data: List[Dict[str, Any]]) -> 'DateRange': + """ + Extract date range from sales data with proper date parsing + + Args: + sales_data: List of sales records + + Returns: + DateRange object with timezone-aware start and end dates + """ + if not sales_data: + raise ValueError("No sales data provided for date range extraction") + + # Convert to DataFrame for easier processing + sales_df = pd.DataFrame(sales_data) + + if 'date' not in sales_df.columns: + raise ValueError("Sales data does not contain a 'date' column") + + # Convert dates to datetime with proper parsing + # This will use the improved date parsing from the data import service + sales_df['date'] = pd.to_datetime(sales_df['date'], utc=True, errors='coerce') + + # Remove any rows with invalid dates + sales_df = sales_df.dropna(subset=['date']) + + if len(sales_df) == 0: raise ValueError("No valid dates found in sales data") - start_date = min(dates) - end_date = max(dates) + # Find the minimum and maximum dates + start_date = sales_df['date'].min() + end_date = sales_df['date'].max() + + logger.info(f"Extracted sales date range: {start_date} to {end_date}") return DateRange(start_date, end_date, DataSourceType.BAKERY_SALES) diff --git a/services/training/app/services/training_service.py b/services/training/app/services/training_service.py index 1518e526..d28fc4ab 100644 --- a/services/training/app/services/training_service.py +++ b/services/training/app/services/training_service.py @@ -6,7 +6,8 @@ Main training service that uses the repository pattern for data access from typing import Dict, List, Any, Optional import uuid import structlog -from datetime import datetime +from datetime import datetime, date, timezone +from decimal import Decimal from sqlalchemy.ext.asyncio import AsyncSession import json import numpy as np @@ -37,10 +38,26 @@ logger = structlog.get_logger() def make_json_serializable(obj): """Convert numpy/pandas types, datetime, and UUID objects to JSON-serializable Python types""" - import uuid - from decimal import Decimal - from datetime import datetime, date + # Handle None values + if obj is None: + return None + + # Handle basic datetime types first (most common) + if isinstance(obj, datetime): + return obj.isoformat() + elif isinstance(obj, date): + return obj.isoformat() + + # Handle pandas timestamp types + if hasattr(pd, 'Timestamp') and isinstance(obj, pd.Timestamp): + return obj.isoformat() + + # Handle numpy datetime types + if hasattr(np, 'datetime64') and isinstance(obj, np.datetime64): + return pd.Timestamp(obj).isoformat() + + # Handle numeric types if isinstance(obj, (np.integer, pd.Int64Dtype)): return int(obj) elif isinstance(obj, (np.floating, pd.Float64Dtype)): @@ -51,19 +68,36 @@ def make_json_serializable(obj): return obj.tolist() elif isinstance(obj, pd.DataFrame): return obj.to_dict('records') + elif isinstance(obj, Decimal): + return float(obj) + + # Handle UUID types elif isinstance(obj, uuid.UUID): return str(obj) elif hasattr(obj, '__class__') and 'UUID' in str(obj.__class__): # Handle any UUID-like objects (including asyncpg.pgproto.pgproto.UUID) return str(obj) - elif isinstance(obj, Decimal): - return float(obj) + + # Handle collections recursively elif isinstance(obj, dict): return {k: make_json_serializable(v) for k, v in obj.items()} - elif isinstance(obj, list): + elif isinstance(obj, (list, tuple)): return [make_json_serializable(item) for item in obj] - else: + elif isinstance(obj, set): + return [make_json_serializable(item) for item in obj] + + # Handle other common types + elif isinstance(obj, (str, int, float, bool)): return obj + + # Last resort: try to convert to string + else: + try: + # For any other object, try to convert to string + return str(obj) + except Exception: + # If all else fails, return None + return None class EnhancedTrainingService: