781 lines
32 KiB
Python
781 lines
32 KiB
Python
"""
|
|
Enhanced Data Import Service
|
|
Service for importing sales data using repository pattern and enhanced error handling
|
|
"""
|
|
|
|
import csv
|
|
import io
|
|
import json
|
|
import base64
|
|
import pandas as pd
|
|
from typing import Dict, Any, List, Optional, Union
|
|
from datetime import datetime, timezone
|
|
import structlog
|
|
import re
|
|
|
|
from app.repositories.sales_repository import SalesRepository
|
|
from app.models.sales import SalesData
|
|
from app.schemas.sales import SalesDataCreate, SalesImportResult, SalesValidationResult
|
|
from shared.database.unit_of_work import UnitOfWork
|
|
from shared.database.transactions import transactional
|
|
from shared.database.exceptions import DatabaseError, ValidationError
|
|
|
|
logger = structlog.get_logger()
|
|
|
|
|
|
class EnhancedDataImportService:
|
|
"""Enhanced data import service using repository pattern"""
|
|
|
|
# Common column mappings for different languages/formats
|
|
COLUMN_MAPPINGS = {
|
|
'date': ['date', 'fecha', 'datum', 'data', 'dia'],
|
|
'datetime': ['datetime', 'fecha_hora', 'timestamp'],
|
|
'product': ['product', 'producto', 'item', 'articulo', 'nombre', 'name'],
|
|
'product_name': ['product_name', 'nombre_producto', 'item_name'],
|
|
'quantity': ['quantity', 'cantidad', 'qty', 'units', 'unidades'],
|
|
'quantity_sold': ['quantity_sold', 'cantidad_vendida', 'sold'],
|
|
'revenue': ['revenue', 'ingresos', 'sales', 'ventas', 'total', 'importe'],
|
|
'price': ['price', 'precio', 'cost', 'coste'],
|
|
'location': ['location', 'ubicacion', 'tienda', 'store', 'punto_venta'],
|
|
'location_id': ['location_id', 'store_id', 'tienda_id'],
|
|
}
|
|
|
|
DATE_FORMATS = [
|
|
'%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y', '%d-%m-%Y', '%m-%d-%Y',
|
|
'%d.%m.%Y', '%Y/%m/%d', '%d/%m/%y', '%m/%d/%y',
|
|
'%Y-%m-%d %H:%M:%S', '%d/%m/%Y %H:%M',
|
|
]
|
|
|
|
def __init__(self, database_manager):
|
|
"""Initialize service with database manager"""
|
|
self.database_manager = database_manager
|
|
|
|
async def validate_import_data(self, data: Dict[str, Any]) -> SalesValidationResult:
|
|
"""Validate import data before processing"""
|
|
try:
|
|
logger.info("Starting import data validation", tenant_id=data.get("tenant_id"))
|
|
|
|
validation_result = SalesValidationResult(
|
|
is_valid=True,
|
|
total_records=0,
|
|
valid_records=0,
|
|
invalid_records=0,
|
|
errors=[],
|
|
warnings=[],
|
|
summary={}
|
|
)
|
|
|
|
errors = []
|
|
warnings = []
|
|
|
|
# Basic validation checks
|
|
if not data.get("tenant_id"):
|
|
errors.append({
|
|
"type": "missing_field",
|
|
"message": "tenant_id es requerido",
|
|
"field": "tenant_id",
|
|
"row": None,
|
|
"code": "MISSING_TENANT_ID"
|
|
})
|
|
|
|
if not data.get("data"):
|
|
errors.append({
|
|
"type": "missing_data",
|
|
"message": "Datos de archivo faltantes",
|
|
"field": "data",
|
|
"row": None,
|
|
"code": "NO_DATA_PROVIDED"
|
|
})
|
|
|
|
validation_result.is_valid = False
|
|
validation_result.errors = errors
|
|
validation_result.summary = {
|
|
"status": "failed",
|
|
"reason": "no_data_provided",
|
|
"file_format": data.get("data_format", "unknown"),
|
|
"suggestions": ["Selecciona un archivo válido para importar"]
|
|
}
|
|
return validation_result
|
|
|
|
# Validate file format
|
|
format_type = data.get("data_format", "").lower()
|
|
supported_formats = ["csv", "excel", "xlsx", "xls", "json", "pos"]
|
|
|
|
if format_type not in supported_formats:
|
|
errors.append({
|
|
"type": "unsupported_format",
|
|
"message": f"Formato no soportado: {format_type}",
|
|
"field": "data_format",
|
|
"row": None,
|
|
"code": "UNSUPPORTED_FORMAT"
|
|
})
|
|
|
|
# Validate data size
|
|
data_content = data.get("data", "")
|
|
data_size = len(data_content)
|
|
|
|
if data_size == 0:
|
|
errors.append({
|
|
"type": "empty_file",
|
|
"message": "El archivo está vacío",
|
|
"field": "data",
|
|
"row": None,
|
|
"code": "EMPTY_FILE"
|
|
})
|
|
elif data_size > 10 * 1024 * 1024: # 10MB limit
|
|
errors.append({
|
|
"type": "file_too_large",
|
|
"message": "Archivo demasiado grande (máximo 10MB)",
|
|
"field": "data",
|
|
"row": None,
|
|
"code": "FILE_TOO_LARGE"
|
|
})
|
|
elif data_size > 1024 * 1024: # 1MB warning
|
|
warnings.append({
|
|
"type": "large_file",
|
|
"message": "Archivo grande detectado. El procesamiento puede tomar más tiempo.",
|
|
"field": "data",
|
|
"row": None,
|
|
"code": "LARGE_FILE_WARNING"
|
|
})
|
|
|
|
# Analyze CSV content if format is CSV
|
|
if format_type == "csv" and data_content and not errors:
|
|
try:
|
|
reader = csv.DictReader(io.StringIO(data_content))
|
|
rows = list(reader)
|
|
|
|
validation_result.total_records = len(rows)
|
|
|
|
if not rows:
|
|
errors.append({
|
|
"type": "empty_content",
|
|
"message": "El archivo CSV no contiene datos",
|
|
"field": "data",
|
|
"row": None,
|
|
"code": "NO_CONTENT"
|
|
})
|
|
else:
|
|
# Analyze structure
|
|
headers = list(rows[0].keys()) if rows else []
|
|
column_mapping = self._detect_columns(headers)
|
|
|
|
# Check for required columns
|
|
if not column_mapping.get('date'):
|
|
errors.append({
|
|
"type": "missing_column",
|
|
"message": "Columna de fecha no encontrada",
|
|
"field": "date",
|
|
"row": None,
|
|
"code": "MISSING_DATE_COLUMN"
|
|
})
|
|
|
|
if not column_mapping.get('product'):
|
|
errors.append({
|
|
"type": "missing_column",
|
|
"message": "Columna de producto no encontrada",
|
|
"field": "product",
|
|
"row": None,
|
|
"code": "MISSING_PRODUCT_COLUMN"
|
|
})
|
|
|
|
if not column_mapping.get('quantity'):
|
|
warnings.append({
|
|
"type": "missing_column",
|
|
"message": "Columna de cantidad no encontrada, se usará 1 por defecto",
|
|
"field": "quantity",
|
|
"row": None,
|
|
"code": "MISSING_QUANTITY_COLUMN"
|
|
})
|
|
|
|
# Calculate estimated valid/invalid records
|
|
if not errors:
|
|
estimated_invalid = max(0, int(validation_result.total_records * 0.1))
|
|
validation_result.valid_records = validation_result.total_records - estimated_invalid
|
|
validation_result.invalid_records = estimated_invalid
|
|
else:
|
|
validation_result.valid_records = 0
|
|
validation_result.invalid_records = validation_result.total_records
|
|
|
|
except Exception as csv_error:
|
|
logger.warning("CSV analysis failed", error=str(csv_error))
|
|
warnings.append({
|
|
"type": "analysis_warning",
|
|
"message": f"No se pudo analizar completamente el CSV: {str(csv_error)}",
|
|
"field": "data",
|
|
"row": None,
|
|
"code": "CSV_ANALYSIS_WARNING"
|
|
})
|
|
|
|
# Set validation result
|
|
validation_result.is_valid = len(errors) == 0
|
|
validation_result.errors = errors
|
|
validation_result.warnings = warnings
|
|
|
|
# Build summary
|
|
validation_result.summary = {
|
|
"status": "valid" if validation_result.is_valid else "invalid",
|
|
"file_format": format_type,
|
|
"file_size_bytes": data_size,
|
|
"file_size_mb": round(data_size / (1024 * 1024), 2),
|
|
"estimated_processing_time_seconds": max(1, validation_result.total_records // 100),
|
|
"validation_timestamp": datetime.utcnow().isoformat(),
|
|
"suggestions": self._generate_suggestions(validation_result, format_type, len(warnings))
|
|
}
|
|
|
|
logger.info("Import validation completed",
|
|
is_valid=validation_result.is_valid,
|
|
total_records=validation_result.total_records,
|
|
error_count=len(errors),
|
|
warning_count=len(warnings))
|
|
|
|
return validation_result
|
|
|
|
except Exception as e:
|
|
logger.error("Validation process failed", error=str(e))
|
|
|
|
return SalesValidationResult(
|
|
is_valid=False,
|
|
total_records=0,
|
|
valid_records=0,
|
|
invalid_records=0,
|
|
errors=[{
|
|
"type": "system_error",
|
|
"message": f"Error en el proceso de validación: {str(e)}",
|
|
"field": None,
|
|
"row": None,
|
|
"code": "SYSTEM_ERROR"
|
|
}],
|
|
warnings=[],
|
|
summary={
|
|
"status": "error",
|
|
"file_format": data.get("data_format", "unknown"),
|
|
"error_type": "system_error",
|
|
"suggestions": [
|
|
"Intenta de nuevo con un archivo diferente",
|
|
"Contacta soporte si el problema persiste"
|
|
]
|
|
}
|
|
)
|
|
|
|
async def process_import(
|
|
self,
|
|
tenant_id: str,
|
|
content: str,
|
|
file_format: str,
|
|
filename: Optional[str] = None,
|
|
session = None
|
|
) -> SalesImportResult:
|
|
"""Process data import using repository pattern"""
|
|
start_time = datetime.utcnow()
|
|
|
|
try:
|
|
logger.info("Starting data import using repository pattern",
|
|
filename=filename,
|
|
format=file_format,
|
|
tenant_id=tenant_id)
|
|
|
|
async with self.database_manager.get_session() as db_session:
|
|
async with UnitOfWork(db_session) as uow:
|
|
# Register sales repository
|
|
sales_repo = uow.register_repository("sales", SalesRepository, SalesData)
|
|
|
|
# Process data based on format
|
|
if file_format.lower() == 'csv':
|
|
result = await self._process_csv_data(tenant_id, content, sales_repo, filename)
|
|
elif file_format.lower() == 'json':
|
|
result = await self._process_json_data(tenant_id, content, sales_repo, filename)
|
|
elif file_format.lower() in ['excel', 'xlsx']:
|
|
result = await self._process_excel_data(tenant_id, content, sales_repo, filename)
|
|
else:
|
|
raise ValidationError(f"Unsupported format: {file_format}")
|
|
|
|
# Commit all changes
|
|
await uow.commit()
|
|
|
|
# Calculate processing time
|
|
end_time = datetime.utcnow()
|
|
processing_time = (end_time - start_time).total_seconds()
|
|
|
|
# Build final result
|
|
final_result = SalesImportResult(
|
|
success=result.get("success", False),
|
|
records_processed=result.get("total_rows", 0),
|
|
records_created=result.get("records_created", 0),
|
|
records_updated=0, # We don't update, only create
|
|
records_failed=result.get("total_rows", 0) - result.get("records_created", 0),
|
|
errors=self._structure_messages(result.get("errors", [])),
|
|
warnings=self._structure_messages(result.get("warnings", [])),
|
|
processing_time_seconds=processing_time
|
|
)
|
|
|
|
logger.info("Data import completed successfully",
|
|
records_created=final_result.records_created,
|
|
processing_time=processing_time)
|
|
|
|
return final_result
|
|
|
|
except (ValidationError, DatabaseError):
|
|
raise
|
|
except Exception as e:
|
|
end_time = datetime.utcnow()
|
|
processing_time = (end_time - start_time).total_seconds()
|
|
|
|
logger.error("Data import failed", error=str(e), tenant_id=tenant_id)
|
|
|
|
return SalesImportResult(
|
|
success=False,
|
|
records_processed=0,
|
|
records_created=0,
|
|
records_updated=0,
|
|
records_failed=0,
|
|
errors=[{
|
|
"type": "import_error",
|
|
"message": f"Import failed: {str(e)}",
|
|
"field": None,
|
|
"row": None,
|
|
"code": "IMPORT_FAILURE"
|
|
}],
|
|
warnings=[],
|
|
processing_time_seconds=processing_time
|
|
)
|
|
|
|
async def _process_csv_data(
|
|
self,
|
|
tenant_id: str,
|
|
csv_content: str,
|
|
sales_repo: SalesRepository,
|
|
filename: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Process CSV data using repository"""
|
|
try:
|
|
reader = csv.DictReader(io.StringIO(csv_content))
|
|
rows = list(reader)
|
|
|
|
if not rows:
|
|
return {
|
|
"success": False,
|
|
"total_rows": 0,
|
|
"records_created": 0,
|
|
"errors": ["CSV file is empty"],
|
|
"warnings": []
|
|
}
|
|
|
|
# Column mapping
|
|
column_mapping = self._detect_columns(list(rows[0].keys()))
|
|
|
|
records_created = 0
|
|
errors = []
|
|
warnings = []
|
|
|
|
logger.info(f"Processing {len(rows)} records from CSV")
|
|
|
|
for index, row in enumerate(rows):
|
|
try:
|
|
# Parse and validate data
|
|
parsed_data = await self._parse_row_data(row, column_mapping, index + 1)
|
|
if parsed_data.get("skip"):
|
|
errors.extend(parsed_data.get("errors", []))
|
|
warnings.extend(parsed_data.get("warnings", []))
|
|
continue
|
|
|
|
# Create sales record using repository
|
|
record_data = {
|
|
"tenant_id": tenant_id,
|
|
"date": parsed_data["date"],
|
|
"product_name": parsed_data["product_name"],
|
|
"quantity_sold": parsed_data["quantity_sold"],
|
|
"revenue": parsed_data.get("revenue"),
|
|
"location_id": parsed_data.get("location_id"),
|
|
"source": "csv"
|
|
}
|
|
|
|
await sales_repo.create(record_data)
|
|
records_created += 1
|
|
|
|
# Log progress for large imports
|
|
if records_created % 100 == 0:
|
|
logger.info(f"Processed {records_created} records...")
|
|
|
|
except Exception as e:
|
|
error_msg = f"Row {index + 1}: {str(e)}"
|
|
errors.append(error_msg)
|
|
logger.warning("Record processing failed", error=error_msg)
|
|
|
|
success_rate = (records_created / len(rows)) * 100 if rows else 0
|
|
|
|
return {
|
|
"success": records_created > 0,
|
|
"total_rows": len(rows),
|
|
"records_created": records_created,
|
|
"success_rate": success_rate,
|
|
"errors": errors,
|
|
"warnings": warnings
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error("CSV processing failed", error=str(e))
|
|
raise DatabaseError(f"CSV processing error: {str(e)}")
|
|
|
|
async def _process_json_data(
|
|
self,
|
|
tenant_id: str,
|
|
json_content: str,
|
|
sales_repo: SalesRepository,
|
|
filename: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Process JSON data using repository"""
|
|
try:
|
|
# Parse JSON
|
|
if json_content.startswith('data:'):
|
|
json_content = base64.b64decode(json_content.split(',')[1]).decode('utf-8')
|
|
|
|
data = json.loads(json_content)
|
|
|
|
# Handle different JSON structures
|
|
if isinstance(data, dict):
|
|
if 'data' in data:
|
|
records = data['data']
|
|
elif 'records' in data:
|
|
records = data['records']
|
|
elif 'sales' in data:
|
|
records = data['sales']
|
|
else:
|
|
records = [data] # Single record
|
|
elif isinstance(data, list):
|
|
records = data
|
|
else:
|
|
raise ValidationError("Invalid JSON format")
|
|
|
|
# Convert to DataFrame for consistent processing
|
|
df = pd.DataFrame(records)
|
|
df.columns = df.columns.str.strip().str.lower()
|
|
|
|
return await self._process_dataframe(tenant_id, df, sales_repo, "json", filename)
|
|
|
|
except json.JSONDecodeError as e:
|
|
raise ValidationError(f"Invalid JSON: {str(e)}")
|
|
except Exception as e:
|
|
logger.error("JSON processing failed", error=str(e))
|
|
raise DatabaseError(f"JSON processing error: {str(e)}")
|
|
|
|
async def _process_excel_data(
|
|
self,
|
|
tenant_id: str,
|
|
excel_content: str,
|
|
sales_repo: SalesRepository,
|
|
filename: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Process Excel data using repository"""
|
|
try:
|
|
# Decode base64 content
|
|
if excel_content.startswith('data:'):
|
|
excel_bytes = base64.b64decode(excel_content.split(',')[1])
|
|
else:
|
|
excel_bytes = base64.b64decode(excel_content)
|
|
|
|
# Read Excel file
|
|
df = pd.read_excel(io.BytesIO(excel_bytes), sheet_name=0)
|
|
|
|
# Clean column names
|
|
df.columns = df.columns.str.strip().str.lower()
|
|
|
|
# Remove empty rows
|
|
df = df.dropna(how='all')
|
|
|
|
return await self._process_dataframe(tenant_id, df, sales_repo, "excel", filename)
|
|
|
|
except Exception as e:
|
|
logger.error("Excel processing failed", error=str(e))
|
|
raise DatabaseError(f"Excel processing error: {str(e)}")
|
|
|
|
async def _process_dataframe(
|
|
self,
|
|
tenant_id: str,
|
|
df: pd.DataFrame,
|
|
sales_repo: SalesRepository,
|
|
source: str,
|
|
filename: Optional[str] = None
|
|
) -> Dict[str, Any]:
|
|
"""Process DataFrame using repository"""
|
|
try:
|
|
# Map columns
|
|
column_mapping = self._detect_columns(df.columns.tolist())
|
|
|
|
if not column_mapping.get('date') or not column_mapping.get('product'):
|
|
required_missing = []
|
|
if not column_mapping.get('date'):
|
|
required_missing.append("date")
|
|
if not column_mapping.get('product'):
|
|
required_missing.append("product")
|
|
|
|
raise ValidationError(f"Required columns missing: {', '.join(required_missing)}")
|
|
|
|
records_created = 0
|
|
errors = []
|
|
warnings = []
|
|
|
|
logger.info(f"Processing {len(df)} records from {source}")
|
|
|
|
for index, row in df.iterrows():
|
|
try:
|
|
# Convert pandas row to dict
|
|
row_dict = {}
|
|
for col in df.columns:
|
|
row_dict[col] = row[col]
|
|
|
|
# Parse and validate data
|
|
parsed_data = await self._parse_row_data(row_dict, column_mapping, index + 1)
|
|
if parsed_data.get("skip"):
|
|
errors.extend(parsed_data.get("errors", []))
|
|
warnings.extend(parsed_data.get("warnings", []))
|
|
continue
|
|
|
|
# Create sales record using repository
|
|
record_data = {
|
|
"tenant_id": tenant_id,
|
|
"date": parsed_data["date"],
|
|
"product_name": parsed_data["product_name"],
|
|
"quantity_sold": parsed_data["quantity_sold"],
|
|
"revenue": parsed_data.get("revenue"),
|
|
"location_id": parsed_data.get("location_id"),
|
|
"source": source
|
|
}
|
|
|
|
await sales_repo.create(record_data)
|
|
records_created += 1
|
|
|
|
# Log progress for large imports
|
|
if records_created % 100 == 0:
|
|
logger.info(f"Processed {records_created} records...")
|
|
|
|
except Exception as e:
|
|
error_msg = f"Row {index + 1}: {str(e)}"
|
|
errors.append(error_msg)
|
|
logger.warning("Record processing failed", error=error_msg)
|
|
|
|
success_rate = (records_created / len(df)) * 100 if len(df) > 0 else 0
|
|
|
|
return {
|
|
"success": records_created > 0,
|
|
"total_rows": len(df),
|
|
"records_created": records_created,
|
|
"success_rate": success_rate,
|
|
"errors": errors[:10], # Limit errors
|
|
"warnings": warnings[:10] # Limit warnings
|
|
}
|
|
|
|
except ValidationError:
|
|
raise
|
|
except Exception as e:
|
|
logger.error("DataFrame processing failed", error=str(e))
|
|
raise DatabaseError(f"Data processing error: {str(e)}")
|
|
|
|
async def _parse_row_data(
|
|
self,
|
|
row: Dict[str, Any],
|
|
column_mapping: Dict[str, str],
|
|
row_number: int
|
|
) -> Dict[str, Any]:
|
|
"""Parse and validate row data"""
|
|
errors = []
|
|
warnings = []
|
|
|
|
try:
|
|
# Extract and validate date
|
|
date_str = str(row.get(column_mapping.get('date', ''), '')).strip()
|
|
if not date_str or date_str.lower() in ['nan', 'null', 'none', '']:
|
|
errors.append(f"Row {row_number}: Missing date")
|
|
return {"skip": True, "errors": errors, "warnings": warnings}
|
|
|
|
parsed_date = self._parse_date(date_str)
|
|
if not parsed_date:
|
|
errors.append(f"Row {row_number}: Invalid date format: {date_str}")
|
|
return {"skip": True, "errors": errors, "warnings": warnings}
|
|
|
|
# Extract and validate product name
|
|
product_name = str(row.get(column_mapping.get('product', ''), '')).strip()
|
|
if not product_name or product_name.lower() in ['nan', 'null', 'none', '']:
|
|
errors.append(f"Row {row_number}: Missing product name")
|
|
return {"skip": True, "errors": errors, "warnings": warnings}
|
|
|
|
product_name = self._clean_product_name(product_name)
|
|
|
|
# Extract and validate quantity
|
|
quantity_raw = row.get(column_mapping.get('quantity', 'quantity'), 1)
|
|
try:
|
|
quantity = int(float(str(quantity_raw).replace(',', '.')))
|
|
if quantity <= 0:
|
|
warnings.append(f"Row {row_number}: Invalid quantity ({quantity}), using 1")
|
|
quantity = 1
|
|
except (ValueError, TypeError):
|
|
warnings.append(f"Row {row_number}: Invalid quantity ({quantity_raw}), using 1")
|
|
quantity = 1
|
|
|
|
# Extract revenue (optional)
|
|
revenue = None
|
|
if 'revenue' in column_mapping and column_mapping['revenue'] in row:
|
|
revenue_raw = row.get(column_mapping['revenue'])
|
|
if revenue_raw and str(revenue_raw).lower() not in ['nan', 'null', 'none', '']:
|
|
try:
|
|
revenue = float(str(revenue_raw).replace(',', '.').replace('€', '').replace('$', '').strip())
|
|
if revenue < 0:
|
|
revenue = None
|
|
warnings.append(f"Row {row_number}: Negative revenue ignored")
|
|
except (ValueError, TypeError):
|
|
warnings.append(f"Row {row_number}: Invalid revenue format: {revenue_raw}")
|
|
|
|
# Extract location (optional)
|
|
location_id = None
|
|
if 'location' in column_mapping and column_mapping['location'] in row:
|
|
location_raw = row.get(column_mapping['location'])
|
|
if location_raw and str(location_raw).lower() not in ['nan', 'null', 'none', '']:
|
|
location_id = str(location_raw).strip()
|
|
|
|
return {
|
|
"skip": False,
|
|
"date": parsed_date,
|
|
"product_name": product_name,
|
|
"quantity_sold": quantity,
|
|
"revenue": revenue,
|
|
"location_id": location_id,
|
|
"errors": errors,
|
|
"warnings": warnings
|
|
}
|
|
|
|
except Exception as e:
|
|
errors.append(f"Row {row_number}: Parsing error: {str(e)}")
|
|
return {"skip": True, "errors": errors, "warnings": warnings}
|
|
|
|
def _detect_columns(self, columns: List[str]) -> Dict[str, str]:
|
|
"""Detect column mappings using fuzzy matching"""
|
|
mapping = {}
|
|
columns_lower = [col.lower() for col in columns]
|
|
|
|
for standard_name, possible_names in self.COLUMN_MAPPINGS.items():
|
|
for col in columns_lower:
|
|
for possible in possible_names:
|
|
if possible in col or col in possible:
|
|
mapping[standard_name] = columns[columns_lower.index(col)]
|
|
break
|
|
if standard_name in mapping:
|
|
break
|
|
|
|
# Map common aliases
|
|
if 'product' not in mapping and 'product_name' in mapping:
|
|
mapping['product'] = mapping['product_name']
|
|
if 'quantity' not in mapping and 'quantity_sold' in mapping:
|
|
mapping['quantity'] = mapping['quantity_sold']
|
|
if 'location' not in mapping and 'location_id' in mapping:
|
|
mapping['location'] = mapping['location_id']
|
|
|
|
return mapping
|
|
|
|
def _parse_date(self, date_str: str) -> Optional[datetime]:
|
|
"""Parse date string with multiple format attempts"""
|
|
if not date_str or str(date_str).lower() in ['nan', 'null', 'none']:
|
|
return None
|
|
|
|
date_str = str(date_str).strip()
|
|
|
|
# Try pandas first
|
|
try:
|
|
parsed_dt = pd.to_datetime(date_str, dayfirst=True)
|
|
if hasattr(parsed_dt, 'to_pydatetime'):
|
|
parsed_dt = parsed_dt.to_pydatetime()
|
|
|
|
if parsed_dt.tzinfo is None:
|
|
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
|
|
|
return parsed_dt
|
|
except Exception:
|
|
pass
|
|
|
|
# Try specific formats
|
|
for fmt in self.DATE_FORMATS:
|
|
try:
|
|
parsed_dt = datetime.strptime(date_str, fmt)
|
|
if parsed_dt.tzinfo is None:
|
|
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
|
return parsed_dt
|
|
except ValueError:
|
|
continue
|
|
|
|
return None
|
|
|
|
def _clean_product_name(self, product_name: str) -> str:
|
|
"""Clean and standardize product names"""
|
|
if not product_name:
|
|
return "Producto sin nombre"
|
|
|
|
# Remove extra whitespace
|
|
cleaned = re.sub(r'\s+', ' ', str(product_name).strip())
|
|
|
|
# Remove special characters but keep Spanish characters
|
|
cleaned = re.sub(r'[^\w\s\-áéíóúñçüÁÉÍÓÚÑÇÜ]', '', cleaned)
|
|
|
|
# Capitalize first letter of each word
|
|
cleaned = cleaned.title()
|
|
|
|
# Common corrections for Spanish bakeries
|
|
replacements = {
|
|
'Pan De': 'Pan de',
|
|
'Café Con': 'Café con',
|
|
'Te ': 'Té ',
|
|
'Bocadillo De': 'Bocadillo de',
|
|
}
|
|
|
|
for old, new in replacements.items():
|
|
cleaned = cleaned.replace(old, new)
|
|
|
|
return cleaned if cleaned else "Producto sin nombre"
|
|
|
|
def _structure_messages(self, messages: List[Union[str, Dict]]) -> List[Dict[str, Any]]:
|
|
"""Convert string messages to structured format"""
|
|
structured = []
|
|
for msg in messages:
|
|
if isinstance(msg, str):
|
|
structured.append({
|
|
"type": "general_message",
|
|
"message": msg,
|
|
"field": None,
|
|
"row": None,
|
|
"code": "GENERAL_MESSAGE"
|
|
})
|
|
else:
|
|
structured.append(msg)
|
|
return structured
|
|
|
|
def _generate_suggestions(
|
|
self,
|
|
validation_result: SalesValidationResult,
|
|
format_type: str,
|
|
warning_count: int
|
|
) -> List[str]:
|
|
"""Generate contextual suggestions based on validation results"""
|
|
suggestions = []
|
|
|
|
if validation_result.is_valid:
|
|
suggestions.append("El archivo está listo para procesamiento")
|
|
suggestions.append(f"Se procesarán aproximadamente {validation_result.total_records} registros")
|
|
|
|
if validation_result.total_records > 1000:
|
|
suggestions.append("Archivo grande: el procesamiento puede tomar varios minutos")
|
|
|
|
if warning_count > 0:
|
|
suggestions.append("Revisa las advertencias antes de continuar")
|
|
else:
|
|
suggestions.append("Corrige los errores antes de continuar")
|
|
suggestions.append("Verifica que el archivo tenga el formato correcto")
|
|
|
|
if format_type not in ["csv", "excel", "xlsx", "json"]:
|
|
suggestions.append("Usa formato CSV o Excel")
|
|
|
|
if validation_result.total_records == 0:
|
|
suggestions.append("Asegúrate de que el archivo contenga datos")
|
|
|
|
return suggestions
|
|
|
|
|
|
# Legacy compatibility alias
|
|
DataImportService = EnhancedDataImportService |