Checking onboardin flow - fix 1

This commit is contained in:
Urtzi Alfaro
2025-07-27 10:01:37 +02:00
parent abad270282
commit cb3ae4d78b
4 changed files with 494 additions and 181 deletions

View File

@@ -10,11 +10,11 @@ import base64
import openpyxl
import pandas as pd
from typing import Dict, Any, List, Optional, Union
from datetime import datetime, timedelta
from sqlalchemy.ext.asyncio import AsyncSession
import structlog
import re
from pathlib import Path
from datetime import datetime, timezone
from app.services.sales_service import SalesService
from app.schemas.sales import SalesDataCreate
@@ -633,7 +633,7 @@ class DataImportService:
@staticmethod
def _parse_date(date_str: str) -> Optional[datetime]:
"""Parse date string with multiple format attempts"""
"""Parse date string with multiple format attempts - FIXED for timezone"""
if not date_str or str(date_str).lower() in ['nan', 'null', 'none']:
return None
@@ -642,36 +642,61 @@ class DataImportService:
# Try pandas first (handles most formats automatically)
try:
return pd.to_datetime(date_str, dayfirst=True)
except:
parsed_dt = pd.to_datetime(date_str, dayfirst=True)
# ✅ CRITICAL FIX: Convert pandas Timestamp to timezone-aware datetime
if hasattr(parsed_dt, 'to_pydatetime'):
# Convert pandas Timestamp to Python datetime
parsed_dt = parsed_dt.to_pydatetime()
# ✅ CRITICAL FIX: Ensure timezone-aware
if parsed_dt.tzinfo is None:
# Assume UTC for timezone-naive dates
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
return parsed_dt
except Exception:
pass
# Try specific formats
for fmt in DataImportService.DATE_FORMATS:
try:
return datetime.strptime(date_str, fmt)
parsed_dt = datetime.strptime(date_str, fmt)
# ✅ CRITICAL FIX: Ensure timezone-aware
if parsed_dt.tzinfo is None:
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
return parsed_dt
except ValueError:
continue
# Try extracting numbers and common patterns
try:
# Look for patterns like dd/mm/yyyy or dd-mm-yyyy
date_pattern = re.search(r'(\d{1,2})[/\-.](\d{1,2})[/\-.](\d{2,4})', date_str)
date_pattern = re.search(r'(\d{1,2})[/\-.](\d{1,2})[/\-.](\d{4})', date_str)
if date_pattern:
day, month, year = date_pattern.groups()
# Convert 2-digit year to 4-digit
year = int(year)
if year < 50:
year += 2000
elif year < 100:
year += 1900
# Try dd/mm/yyyy format (European style)
try:
parsed_dt = datetime(int(year), int(month), int(day))
return parsed_dt.replace(tzinfo=timezone.utc)
except ValueError:
pass
return datetime(year, int(month), int(day))
except:
# Try mm/dd/yyyy format (US style)
try:
parsed_dt = datetime(int(year), int(day), int(month))
return parsed_dt.replace(tzinfo=timezone.utc)
except ValueError:
pass
except Exception:
pass
logger.warning(f"Could not parse date: {date_str}")
return None
@staticmethod