Fix issues 3
This commit is contained in:
@@ -865,34 +865,65 @@ class DataImportService:
|
||||
return mapping
|
||||
|
||||
def _parse_date(self, date_str: str) -> Optional[datetime]:
|
||||
"""Enhanced date parsing with pandas and multiple format support"""
|
||||
"""Enhanced date parsing with explicit format handling for CSV dates"""
|
||||
if not date_str or str(date_str).lower() in ['nan', 'null', 'none']:
|
||||
return None
|
||||
|
||||
date_str = str(date_str).strip()
|
||||
|
||||
# Try pandas first (most robust)
|
||||
# For CSV format like "2024/10/01", try specific formats first to avoid ambiguity
|
||||
# Priority order: YYYY/MM/DD (most likely for machine-generated data)
|
||||
priority_formats = [
|
||||
'%Y/%m/%d', # 2024/10/01 (October 1, 2024) - most likely for CSV exports
|
||||
'%Y-%m-%d', # 2024-10-01
|
||||
'%d/%m/%Y', # 01/10/2024 (European format)
|
||||
'%m/%d/%Y', # 10/01/2024 (US format)
|
||||
]
|
||||
|
||||
# Try priority formats first
|
||||
for fmt in priority_formats:
|
||||
try:
|
||||
parsed_dt = datetime.strptime(date_str, fmt)
|
||||
if parsed_dt.tzinfo is None:
|
||||
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
||||
logger.debug(f"Successfully parsed date '{date_str}' using format '{fmt}' -> {parsed_dt}")
|
||||
return parsed_dt
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Try pandas as fallback with explicit format inference
|
||||
try:
|
||||
parsed_dt = pd.to_datetime(date_str, dayfirst=True)
|
||||
# For YYYY/MM/DD format, disable dayfirst to prevent misinterpretation
|
||||
if '/' in date_str and len(date_str.split('/')[0]) == 4:
|
||||
# Looks like YYYY/MM/DD format, so don't use dayfirst
|
||||
parsed_dt = pd.to_datetime(date_str, dayfirst=False)
|
||||
else:
|
||||
# For other formats, use dayfirst=True for European-style dates
|
||||
parsed_dt = pd.to_datetime(date_str, dayfirst=True)
|
||||
|
||||
if hasattr(parsed_dt, 'to_pydatetime'):
|
||||
parsed_dt = parsed_dt.to_pydatetime()
|
||||
|
||||
if parsed_dt.tzinfo is None:
|
||||
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
logger.debug(f"Successfully parsed date '{date_str}' using pandas -> {parsed_dt}")
|
||||
return parsed_dt
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
logger.debug(f"Pandas date parsing failed for '{date_str}': {e}")
|
||||
pass
|
||||
|
||||
# Try specific formats as fallback
|
||||
# Try remaining formats as last fallback
|
||||
for fmt in self.DATE_FORMATS:
|
||||
try:
|
||||
parsed_dt = datetime.strptime(date_str, fmt)
|
||||
if parsed_dt.tzinfo is None:
|
||||
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
||||
return parsed_dt
|
||||
except ValueError:
|
||||
continue
|
||||
if fmt not in priority_formats: # Skip already tried formats
|
||||
try:
|
||||
parsed_dt = datetime.strptime(date_str, fmt)
|
||||
if parsed_dt.tzinfo is None:
|
||||
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
||||
logger.debug(f"Successfully parsed date '{date_str}' using fallback format '{fmt}' -> {parsed_dt}")
|
||||
return parsed_dt
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
logger.warning(f"Could not parse date: {date_str}")
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user