REFACTOR data service

This commit is contained in:
Urtzi Alfaro
2025-08-12 18:17:30 +02:00
parent 7c237c0acc
commit fbe7470ad9
149 changed files with 8528 additions and 7393 deletions

View File

@@ -0,0 +1,8 @@
# services/sales/app/services/__init__.py
from .sales_service import SalesService
from .product_service import ProductService
from .data_import_service import DataImportService
from .messaging import SalesEventPublisher, sales_publisher
__all__ = ["SalesService", "ProductService", "DataImportService", "SalesEventPublisher", "sales_publisher"]

View File

@@ -0,0 +1,943 @@
# services/sales/app/services/data_import_service.py
"""
Data Import Service
Service for importing sales data using repository pattern and enhanced error handling
"""
import csv
import io
import json
import base64
import pandas as pd
from typing import Dict, Any, List, Optional, Union
from datetime import datetime, timezone
import structlog
import re
from app.repositories.sales_repository import SalesRepository
from app.models.sales import SalesData
from app.schemas.sales import SalesDataCreate
from app.core.database import get_db_transaction
logger = structlog.get_logger()
# Import result schemas (dataclass definitions)
from dataclasses import dataclass
from typing import List, Dict, Any
@dataclass
class SalesValidationResult:
is_valid: bool
total_records: int
valid_records: int
invalid_records: int
errors: List[Dict[str, Any]]
warnings: List[Dict[str, Any]]
summary: Dict[str, Any]
@dataclass
class SalesImportResult:
success: bool
records_processed: int
records_created: int
records_updated: int
records_failed: int
errors: List[Dict[str, Any]]
warnings: List[Dict[str, Any]]
processing_time_seconds: float
class DataImportService:
"""Enhanced data import service using repository pattern with STRICT validation for production"""
# PRODUCTION VALIDATION CONFIGURATION
STRICT_VALIDATION = True # Set to False for lenient validation, True for production quality
MAX_QUANTITY_PER_DAY = 10000 # Maximum reasonable quantity per product per day
MAX_REVENUE_PER_ITEM = 100000 # Maximum reasonable revenue per line item
MAX_UNIT_PRICE = 10000 # Maximum reasonable price per unit for bakery items
# Common column mappings for different languages/formats
COLUMN_MAPPINGS = {
'date': ['date', 'fecha', 'datum', 'data', 'dia'],
'datetime': ['datetime', 'fecha_hora', 'timestamp'],
'product': ['product', 'producto', 'item', 'articulo', 'nombre', 'name'],
'product_name': ['product_name', 'nombre_producto', 'item_name'],
'quantity': ['quantity', 'cantidad', 'qty', 'units', 'unidades'],
'quantity_sold': ['quantity_sold', 'cantidad_vendida', 'sold'],
'revenue': ['revenue', 'ingresos', 'sales', 'ventas', 'total', 'importe'],
'price': ['price', 'precio', 'cost', 'coste'],
'location': ['location', 'ubicacion', 'tienda', 'store', 'punto_venta'],
'location_id': ['location_id', 'store_id', 'tienda_id'],
}
DATE_FORMATS = [
'%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y', '%d-%m-%Y', '%m-%d-%Y',
'%d.%m.%Y', '%Y/%m/%d', '%d/%m/%y', '%m/%d/%y',
'%Y-%m-%d %H:%M:%S', '%d/%m/%Y %H:%M',
]
def __init__(self):
"""Initialize enhanced import service"""
pass
async def validate_import_data(self, data: Dict[str, Any]) -> SalesValidationResult:
"""Enhanced validation with better error handling and suggestions"""
try:
logger.info("Starting enhanced import data validation", tenant_id=data.get("tenant_id"))
validation_result = SalesValidationResult(
is_valid=True,
total_records=0,
valid_records=0,
invalid_records=0,
errors=[],
warnings=[],
summary={}
)
errors = []
warnings = []
# Basic validation checks
if not data.get("tenant_id"):
errors.append({
"type": "missing_field",
"message": "tenant_id es requerido",
"field": "tenant_id",
"row": None,
"code": "MISSING_TENANT_ID"
})
if not data.get("data"):
errors.append({
"type": "missing_data",
"message": "Datos de archivo faltantes",
"field": "data",
"row": None,
"code": "NO_DATA_PROVIDED"
})
validation_result.is_valid = False
validation_result.errors = errors
validation_result.summary = {
"status": "failed",
"reason": "no_data_provided",
"file_format": data.get("data_format", "unknown"),
"suggestions": ["Selecciona un archivo válido para importar"]
}
return validation_result
# Validate file format
format_type = data.get("data_format", "").lower()
supported_formats = ["csv", "excel", "xlsx", "xls", "json", "pos"]
if format_type not in supported_formats:
errors.append({
"type": "unsupported_format",
"message": f"Formato no soportado: {format_type}",
"field": "data_format",
"row": None,
"code": "UNSUPPORTED_FORMAT"
})
# Validate data size
data_content = data.get("data", "")
data_size = len(data_content)
if data_size == 0:
errors.append({
"type": "empty_file",
"message": "El archivo está vacío",
"field": "data",
"row": None,
"code": "EMPTY_FILE"
})
elif data_size > 10 * 1024 * 1024: # 10MB limit
errors.append({
"type": "file_too_large",
"message": "Archivo demasiado grande (máximo 10MB)",
"field": "data",
"row": None,
"code": "FILE_TOO_LARGE"
})
elif data_size > 1024 * 1024: # 1MB warning
warnings.append({
"type": "large_file",
"message": "Archivo grande detectado. El procesamiento puede tomar más tiempo.",
"field": "data",
"row": None,
"code": "LARGE_FILE_WARNING"
})
# Analyze CSV content if format is CSV
if format_type == "csv" and data_content and not errors:
try:
reader = csv.DictReader(io.StringIO(data_content))
rows = list(reader)
validation_result.total_records = len(rows)
if not rows:
errors.append({
"type": "empty_content",
"message": "El archivo CSV no contiene datos",
"field": "data",
"row": None,
"code": "NO_CONTENT"
})
else:
# Enhanced column analysis
headers = list(rows[0].keys()) if rows else []
column_mapping = self._detect_columns(headers)
# Check for required columns
if not column_mapping.get('date'):
errors.append({
"type": "missing_column",
"message": "Columna de fecha no encontrada",
"field": "date",
"row": None,
"code": "MISSING_DATE_COLUMN"
})
if not column_mapping.get('product'):
errors.append({
"type": "missing_column",
"message": "Columna de producto no encontrada",
"field": "product",
"row": None,
"code": "MISSING_PRODUCT_COLUMN"
})
if not column_mapping.get('quantity'):
warnings.append({
"type": "missing_column",
"message": "Columna de cantidad no encontrada, se usará 1 por defecto",
"field": "quantity",
"row": None,
"code": "MISSING_QUANTITY_COLUMN"
})
# Enhanced data quality estimation
if not errors:
sample_size = min(10, len(rows))
sample_rows = rows[:sample_size]
quality_issues = 0
for i, row in enumerate(sample_rows):
parsed_data = await self._parse_row_data(row, column_mapping, i + 1)
if parsed_data.get("skip") or parsed_data.get("errors"):
quality_issues += 1
estimated_error_rate = (quality_issues / sample_size) * 100 if sample_size > 0 else 0
estimated_invalid = int(validation_result.total_records * estimated_error_rate / 100)
validation_result.valid_records = validation_result.total_records - estimated_invalid
validation_result.invalid_records = estimated_invalid
# STRICT: Any data quality issues should fail validation for production
if estimated_error_rate > 0:
errors.append({
"type": "data_quality_error",
"message": f"Falló la validación de calidad: {estimated_error_rate:.0f}% de los datos tienen errores críticos",
"field": "data",
"row": None,
"code": "DATA_QUALITY_FAILED"
})
# Add specific error details
if estimated_error_rate > 50:
errors.append({
"type": "data_quality_critical",
"message": f"Calidad de datos crítica: más del 50% de los registros tienen errores",
"field": "data",
"row": None,
"code": "DATA_QUALITY_CRITICAL"
})
elif estimated_error_rate > 20:
errors.append({
"type": "data_quality_high",
"message": f"Alta tasa de errores detectada: {estimated_error_rate:.0f}% de los datos requieren corrección",
"field": "data",
"row": None,
"code": "DATA_QUALITY_HIGH_ERROR_RATE"
})
else:
# Even small error rates are now treated as validation failures
errors.append({
"type": "data_quality_detected",
"message": f"Se detectaron errores de validación en {estimated_error_rate:.0f}% de los datos",
"field": "data",
"row": None,
"code": "DATA_QUALITY_ERRORS_FOUND"
})
else:
validation_result.valid_records = 0
validation_result.invalid_records = validation_result.total_records
except Exception as csv_error:
logger.warning("Enhanced CSV analysis failed", error=str(csv_error))
warnings.append({
"type": "analysis_warning",
"message": f"No se pudo analizar completamente el CSV: {str(csv_error)}",
"field": "data",
"row": None,
"code": "CSV_ANALYSIS_WARNING"
})
# Set validation result
validation_result.is_valid = len(errors) == 0
validation_result.errors = errors
validation_result.warnings = warnings
# Enhanced summary generation
validation_result.summary = {
"status": "valid" if validation_result.is_valid else "invalid",
"file_format": format_type,
"file_size_bytes": data_size,
"file_size_mb": round(data_size / (1024 * 1024), 2),
"estimated_processing_time_seconds": max(1, validation_result.total_records // 100),
"validation_timestamp": datetime.utcnow().isoformat(),
"detected_columns": list(self._detect_columns(list(csv.DictReader(io.StringIO(data_content)).fieldnames or [])).keys()) if format_type == "csv" and data_content else [],
"suggestions": self._generate_suggestions(validation_result, format_type, len(warnings))
}
logger.info("Enhanced import validation completed",
is_valid=validation_result.is_valid,
total_records=validation_result.total_records,
error_count=len(errors),
warning_count=len(warnings))
return validation_result
except Exception as e:
logger.error("Enhanced validation process failed", error=str(e))
return SalesValidationResult(
is_valid=False,
total_records=0,
valid_records=0,
invalid_records=0,
errors=[{
"type": "system_error",
"message": f"Error en el proceso de validación: {str(e)}",
"field": None,
"row": None,
"code": "SYSTEM_ERROR"
}],
warnings=[],
summary={
"status": "error",
"file_format": data.get("data_format", "unknown"),
"error_type": "system_error",
"suggestions": [
"Intenta de nuevo con un archivo diferente",
"Contacta soporte si el problema persiste"
]
}
)
async def process_import(
self,
tenant_id: str,
content: str,
file_format: str,
filename: Optional[str] = None
) -> SalesImportResult:
"""Enhanced data import processing with better error handling"""
start_time = datetime.utcnow()
try:
logger.info("Starting enhanced data import",
filename=filename,
format=file_format,
tenant_id=tenant_id)
async with get_db_transaction() as db:
repository = SalesRepository(db)
# Process data based on format
if file_format.lower() == 'csv':
result = await self._process_csv_data(tenant_id, content, repository, filename)
elif file_format.lower() == 'json':
result = await self._process_json_data(tenant_id, content, repository, filename)
elif file_format.lower() in ['excel', 'xlsx']:
result = await self._process_excel_data(tenant_id, content, repository, filename)
else:
raise ValueError(f"Unsupported format: {file_format}")
# Calculate processing time
end_time = datetime.utcnow()
processing_time = (end_time - start_time).total_seconds()
# Build enhanced final result
final_result = SalesImportResult(
success=result.get("success", False),
records_processed=result.get("total_rows", 0),
records_created=result.get("records_created", 0),
records_updated=0, # We don't update, only create
records_failed=result.get("total_rows", 0) - result.get("records_created", 0),
errors=self._structure_messages(result.get("errors", [])),
warnings=self._structure_messages(result.get("warnings", [])),
processing_time_seconds=processing_time
)
logger.info("Enhanced data import completed successfully",
records_created=final_result.records_created,
processing_time=processing_time)
return final_result
except Exception as e:
end_time = datetime.utcnow()
processing_time = (end_time - start_time).total_seconds()
logger.error("Enhanced data import failed", error=str(e), tenant_id=tenant_id)
return SalesImportResult(
success=False,
records_processed=0,
records_created=0,
records_updated=0,
records_failed=0,
errors=[{
"type": "import_error",
"message": f"Import failed: {str(e)}",
"field": None,
"row": None,
"code": "IMPORT_FAILURE"
}],
warnings=[],
processing_time_seconds=processing_time
)
async def _process_csv_data(
self,
tenant_id: str,
csv_content: str,
repository: SalesRepository,
filename: Optional[str] = None
) -> Dict[str, Any]:
"""Enhanced CSV processing with better data handling"""
try:
reader = csv.DictReader(io.StringIO(csv_content))
rows = list(reader)
if not rows:
return {
"success": False,
"total_rows": 0,
"records_created": 0,
"errors": ["CSV file is empty"],
"warnings": []
}
# Enhanced column mapping
column_mapping = self._detect_columns(list(rows[0].keys()))
records_created = 0
errors = []
warnings = []
logger.info(f"Processing {len(rows)} records from CSV with enhanced mapping")
for index, row in enumerate(rows):
try:
# Enhanced data parsing and validation
parsed_data = await self._parse_row_data(row, column_mapping, index + 1)
if parsed_data.get("skip"):
errors.extend(parsed_data.get("errors", []))
warnings.extend(parsed_data.get("warnings", []))
continue
# Create sales record with enhanced data
sales_data = SalesDataCreate(
tenant_id=tenant_id,
date=parsed_data["date"],
product_name=parsed_data["product_name"],
product_category=parsed_data.get("product_category"),
quantity_sold=parsed_data["quantity_sold"],
unit_price=parsed_data.get("unit_price"),
revenue=parsed_data.get("revenue"),
location_id=parsed_data.get("location_id"),
source="csv"
)
created_record = await repository.create_sales_record(sales_data, tenant_id)
records_created += 1
# Enhanced progress logging
if records_created % 100 == 0:
logger.info(f"Enhanced processing: {records_created}/{len(rows)} records completed...")
except Exception as e:
error_msg = f"Row {index + 1}: {str(e)}"
errors.append(error_msg)
logger.warning("Enhanced record processing failed", error=error_msg)
success_rate = (records_created / len(rows)) * 100 if rows else 0
return {
"success": records_created > 0,
"total_rows": len(rows),
"records_created": records_created,
"success_rate": success_rate,
"errors": errors,
"warnings": warnings
}
except Exception as e:
logger.error("Enhanced CSV processing failed", error=str(e))
raise
async def _process_json_data(
self,
tenant_id: str,
json_content: str,
repository: SalesRepository,
filename: Optional[str] = None
) -> Dict[str, Any]:
"""Enhanced JSON processing with pandas integration"""
try:
# Parse JSON with base64 support
if json_content.startswith('data:'):
json_content = base64.b64decode(json_content.split(',')[1]).decode('utf-8')
data = json.loads(json_content)
# Handle different JSON structures
if isinstance(data, dict):
if 'data' in data:
records = data['data']
elif 'records' in data:
records = data['records']
elif 'sales' in data:
records = data['sales']
else:
records = [data] # Single record
elif isinstance(data, list):
records = data
else:
raise ValueError("Invalid JSON format")
# Convert to DataFrame for enhanced processing
if records:
df = pd.DataFrame(records)
df.columns = df.columns.str.strip().str.lower()
return await self._process_dataframe(tenant_id, df, repository, "json", filename)
else:
return {
"success": False,
"total_rows": 0,
"records_created": 0,
"errors": ["No records found in JSON"],
"warnings": []
}
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON: {str(e)}")
except Exception as e:
logger.error("Enhanced JSON processing failed", error=str(e))
raise
async def _process_excel_data(
self,
tenant_id: str,
excel_content: str,
repository: SalesRepository,
filename: Optional[str] = None
) -> Dict[str, Any]:
"""Enhanced Excel processing with base64 support"""
try:
# Decode base64 content
if excel_content.startswith('data:'):
excel_bytes = base64.b64decode(excel_content.split(',')[1])
else:
excel_bytes = base64.b64decode(excel_content)
# Read Excel file with pandas
df = pd.read_excel(io.BytesIO(excel_bytes), sheet_name=0)
# Enhanced column cleaning
df.columns = df.columns.str.strip().str.lower()
# Remove empty rows
df = df.dropna(how='all')
return await self._process_dataframe(tenant_id, df, repository, "excel", filename)
except Exception as e:
logger.error("Enhanced Excel processing failed", error=str(e))
raise
async def _process_dataframe(
self,
tenant_id: str,
df: pd.DataFrame,
repository: SalesRepository,
source: str,
filename: Optional[str] = None
) -> Dict[str, Any]:
"""Enhanced DataFrame processing with better error handling"""
try:
# Enhanced column mapping
column_mapping = self._detect_columns(df.columns.tolist())
if not column_mapping.get('date') or not column_mapping.get('product'):
required_missing = []
if not column_mapping.get('date'):
required_missing.append("date")
if not column_mapping.get('product'):
required_missing.append("product")
raise ValueError(f"Required columns missing: {', '.join(required_missing)}")
records_created = 0
errors = []
warnings = []
logger.info(f"Enhanced processing of {len(df)} records from {source}")
for index, row in df.iterrows():
try:
# Convert pandas row to dict
row_dict = {}
for col in df.columns:
val = row[col]
# Handle pandas NaN values
if pd.isna(val):
row_dict[col] = None
else:
row_dict[col] = val
# Enhanced data parsing
parsed_data = await self._parse_row_data(row_dict, column_mapping, index + 1)
if parsed_data.get("skip"):
errors.extend(parsed_data.get("errors", []))
warnings.extend(parsed_data.get("warnings", []))
continue
# Create enhanced sales record
sales_data = SalesDataCreate(
tenant_id=tenant_id,
date=parsed_data["date"],
product_name=parsed_data["product_name"],
product_category=parsed_data.get("product_category"),
quantity_sold=parsed_data["quantity_sold"],
unit_price=parsed_data.get("unit_price"),
revenue=parsed_data.get("revenue"),
location_id=parsed_data.get("location_id"),
source=source
)
created_record = await repository.create_sales_record(sales_data, tenant_id)
records_created += 1
# Progress logging for large datasets
if records_created % 100 == 0:
logger.info(f"Enhanced DataFrame processing: {records_created}/{len(df)} records completed...")
except Exception as e:
error_msg = f"Row {index + 1}: {str(e)}"
errors.append(error_msg)
logger.warning("Enhanced record processing failed", error=error_msg)
success_rate = (records_created / len(df)) * 100 if len(df) > 0 else 0
return {
"success": records_created > 0,
"total_rows": len(df),
"records_created": records_created,
"success_rate": success_rate,
"errors": errors[:10], # Limit errors for performance
"warnings": warnings[:10] # Limit warnings
}
except ValueError:
raise
except Exception as e:
logger.error("Enhanced DataFrame processing failed", error=str(e))
raise
async def _parse_row_data(
self,
row: Dict[str, Any],
column_mapping: Dict[str, str],
row_number: int
) -> Dict[str, Any]:
"""Enhanced row data parsing with better validation"""
errors = []
warnings = []
try:
# Enhanced date extraction and validation
date_str = str(row.get(column_mapping.get('date', ''), '')).strip()
if not date_str or date_str.lower() in ['nan', 'null', 'none', '']:
errors.append(f"Row {row_number}: Missing date")
return {"skip": True, "errors": errors, "warnings": warnings}
parsed_date = self._parse_date(date_str)
if not parsed_date:
errors.append(f"Row {row_number}: Invalid date format: {date_str}")
return {"skip": True, "errors": errors, "warnings": warnings}
# Enhanced product name extraction and cleaning
product_name = str(row.get(column_mapping.get('product', ''), '')).strip()
if not product_name or product_name.lower() in ['nan', 'null', 'none', '']:
errors.append(f"Row {row_number}: Missing product name")
return {"skip": True, "errors": errors, "warnings": warnings}
product_name = self._clean_product_name(product_name)
# STRICT quantity validation for production data quality
quantity_raw = row.get(column_mapping.get('quantity', 'quantity'), 1)
try:
if pd.isna(quantity_raw):
# Allow default quantity of 1 for missing values
quantity = 1
else:
quantity = int(float(str(quantity_raw).replace(',', '.')))
if quantity <= 0:
# STRICT: Treat invalid quantities as ERRORS, not warnings
errors.append(f"Row {row_number}: Invalid quantity ({quantity}) - quantities must be positive")
return {"skip": True, "errors": errors, "warnings": warnings}
elif self.STRICT_VALIDATION and quantity > self.MAX_QUANTITY_PER_DAY:
# STRICT: Check for unrealistic quantities
errors.append(f"Row {row_number}: Unrealistic quantity ({quantity}) - exceeds maximum expected daily sales ({self.MAX_QUANTITY_PER_DAY})")
return {"skip": True, "errors": errors, "warnings": warnings}
except (ValueError, TypeError):
# STRICT: Treat non-numeric quantities as ERRORS
errors.append(f"Row {row_number}: Invalid quantity format ({quantity_raw}) - must be a positive number")
return {"skip": True, "errors": errors, "warnings": warnings}
# Enhanced revenue extraction
revenue = None
unit_price = None
if 'revenue' in column_mapping and column_mapping['revenue'] in row:
revenue_raw = row.get(column_mapping['revenue'])
if revenue_raw and not pd.isna(revenue_raw) and str(revenue_raw).lower() not in ['nan', 'null', 'none', '']:
try:
revenue = float(str(revenue_raw).replace(',', '.').replace('', '').replace('$', '').strip())
if revenue < 0:
# STRICT: Treat negative revenue as ERROR, not warning
errors.append(f"Row {row_number}: Negative revenue ({revenue}) - revenue must be positive or zero")
return {"skip": True, "errors": errors, "warnings": warnings}
else:
# STRICT: Check for unrealistic revenue values
if self.STRICT_VALIDATION and revenue > self.MAX_REVENUE_PER_ITEM:
errors.append(f"Row {row_number}: Unrealistic revenue ({revenue}) - exceeds maximum expected value ({self.MAX_REVENUE_PER_ITEM})")
return {"skip": True, "errors": errors, "warnings": warnings}
# Calculate unit price if we have both revenue and quantity
unit_price = revenue / quantity if quantity > 0 else None
# STRICT: Validate unit price reasonableness
if unit_price and unit_price > 10000: # More than €10,000 per unit seems unrealistic for bakery
errors.append(f"Row {row_number}: Unrealistic unit price ({unit_price:.2f}) - check quantity and revenue values")
return {"skip": True, "errors": errors, "warnings": warnings}
except (ValueError, TypeError):
# STRICT: Treat invalid revenue format as ERROR
errors.append(f"Row {row_number}: Invalid revenue format ({revenue_raw}) - must be a valid number")
return {"skip": True, "errors": errors, "warnings": warnings}
# Enhanced location extraction
location_id = None
if 'location' in column_mapping and column_mapping['location'] in row:
location_raw = row.get(column_mapping['location'])
if location_raw and not pd.isna(location_raw) and str(location_raw).lower() not in ['nan', 'null', 'none', '']:
location_id = str(location_raw).strip()
# Enhanced product category extraction
product_category = None
if 'category' in column_mapping and column_mapping['category'] in row:
category_raw = row.get(column_mapping['category'])
if category_raw and not pd.isna(category_raw) and str(category_raw).lower() not in ['nan', 'null', 'none', '']:
product_category = str(category_raw).strip()
return {
"skip": False,
"date": parsed_date,
"product_name": product_name,
"product_category": product_category,
"quantity_sold": quantity,
"unit_price": unit_price,
"revenue": revenue,
"location_id": location_id,
"errors": errors,
"warnings": warnings
}
except Exception as e:
errors.append(f"Row {row_number}: Enhanced parsing error: {str(e)}")
return {"skip": True, "errors": errors, "warnings": warnings}
def _detect_columns(self, columns: List[str]) -> Dict[str, str]:
"""Enhanced column detection with fuzzy matching"""
mapping = {}
columns_lower = [col.lower().strip() for col in columns]
for standard_name, possible_names in self.COLUMN_MAPPINGS.items():
best_match = None
best_score = 0
for col_idx, col in enumerate(columns_lower):
for possible in possible_names:
# Exact match (highest priority)
if possible == col:
best_match = columns[col_idx]
best_score = 100
break
# Contains match
elif possible in col or col in possible:
score = len(possible) / len(col) * 90
if score > best_score:
best_match = columns[col_idx]
best_score = score
if best_score == 100: # Found exact match
break
if best_match and best_score > 70: # Threshold for matches
mapping[standard_name] = best_match
# Enhanced alias mapping
if 'product' not in mapping and 'product_name' in mapping:
mapping['product'] = mapping['product_name']
if 'quantity' not in mapping and 'quantity_sold' in mapping:
mapping['quantity'] = mapping['quantity_sold']
if 'location' not in mapping and 'location_id' in mapping:
mapping['location'] = mapping['location_id']
return mapping
def _parse_date(self, date_str: str) -> Optional[datetime]:
"""Enhanced date parsing with pandas and multiple format support"""
if not date_str or str(date_str).lower() in ['nan', 'null', 'none']:
return None
date_str = str(date_str).strip()
# Try pandas first (most robust)
try:
parsed_dt = pd.to_datetime(date_str, dayfirst=True)
if hasattr(parsed_dt, 'to_pydatetime'):
parsed_dt = parsed_dt.to_pydatetime()
if parsed_dt.tzinfo is None:
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
return parsed_dt
except Exception:
pass
# Try specific formats as fallback
for fmt in self.DATE_FORMATS:
try:
parsed_dt = datetime.strptime(date_str, fmt)
if parsed_dt.tzinfo is None:
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
return parsed_dt
except ValueError:
continue
logger.warning(f"Could not parse date: {date_str}")
return None
def _clean_product_name(self, product_name: str) -> str:
"""Enhanced product name cleaning and standardization"""
if not product_name:
return "Producto sin nombre"
# Remove extra whitespace
cleaned = re.sub(r'\s+', ' ', str(product_name).strip())
# Remove special characters but keep Spanish characters
cleaned = re.sub(r'[^\w\s\-áéíóúñçüÁÉÍÓÚÑÇÜ]', '', cleaned)
# Capitalize first letter of each word
cleaned = cleaned.title()
# Enhanced corrections for Spanish bakeries
replacements = {
'Pan De': 'Pan de',
'Café Con': 'Café con',
'Te ': '',
'Bocadillo De': 'Bocadillo de',
'Dulce De': 'Dulce de',
'Tarta De': 'Tarta de',
}
for old, new in replacements.items():
cleaned = cleaned.replace(old, new)
return cleaned if cleaned else "Producto sin nombre"
def _structure_messages(self, messages: List[Union[str, Dict]]) -> List[Dict[str, Any]]:
"""Convert string messages to structured format"""
structured = []
for msg in messages:
if isinstance(msg, str):
structured.append({
"type": "general_message",
"message": msg,
"field": None,
"row": None,
"code": "GENERAL_MESSAGE"
})
else:
structured.append(msg)
return structured
def _generate_suggestions(
self,
validation_result: SalesValidationResult,
format_type: str,
warning_count: int
) -> List[str]:
"""Generate enhanced contextual suggestions"""
suggestions = []
if validation_result.is_valid:
suggestions.append("El archivo está listo para procesamiento")
suggestions.append(f"Se procesarán aproximadamente {validation_result.total_records} registros")
if validation_result.total_records > 1000:
suggestions.append("Archivo grande: el procesamiento puede tomar varios minutos")
suggestions.append("Considera dividir archivos muy grandes para mejor rendimiento")
if warning_count > 0:
suggestions.append("Revisa las advertencias antes de continuar")
suggestions.append("Los datos con advertencias se procesarán con valores por defecto")
# Format-specific suggestions
if format_type == "csv":
suggestions.append("Asegúrate de que las fechas estén en formato DD/MM/YYYY")
suggestions.append("Verifica que los números usen punto decimal (no coma)")
elif format_type in ["excel", "xlsx"]:
suggestions.append("Solo se procesará la primera hoja del archivo Excel")
suggestions.append("Evita celdas combinadas y fórmulas complejas")
else:
suggestions.append("Corrige los errores antes de continuar")
suggestions.append("Verifica que el archivo tenga el formato correcto")
if format_type not in ["csv", "excel", "xlsx", "json"]:
suggestions.append("Usa formato CSV o Excel para mejores resultados")
suggestions.append("El formato JSON es para usuarios avanzados")
if validation_result.total_records == 0:
suggestions.append("Asegúrate de que el archivo contenga datos")
suggestions.append("Verifica que el archivo no esté corrupto")
# Missing column suggestions
error_codes = [error.get("code", "") for error in validation_result.errors if isinstance(error, dict)]
if "MISSING_DATE_COLUMN" in error_codes:
suggestions.append("Incluye una columna de fecha (fecha, date, dia)")
if "MISSING_PRODUCT_COLUMN" in error_codes:
suggestions.append("Incluye una columna de producto (producto, product, item)")
return suggestions
# Main DataImportService class with enhanced functionality

View File

@@ -0,0 +1,232 @@
# services/sales/app/services/messaging.py
"""
Sales Service Messaging - Event Publishing using shared messaging infrastructure
"""
import structlog
from typing import Dict, Any, Optional
from uuid import UUID
from datetime import datetime
from shared.messaging.rabbitmq import RabbitMQClient
from shared.messaging.events import BaseEvent, DataImportedEvent
from app.core.config import settings
logger = structlog.get_logger()
class SalesEventPublisher:
"""Sales service event publisher using RabbitMQ"""
def __init__(self):
self.enabled = True
self._rabbitmq_client = None
async def _get_rabbitmq_client(self):
"""Get or create RabbitMQ client"""
if not self._rabbitmq_client:
self._rabbitmq_client = RabbitMQClient(
connection_url=settings.RABBITMQ_URL,
service_name="sales-service"
)
await self._rabbitmq_client.connect()
return self._rabbitmq_client
async def publish_sales_created(self, sales_data: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
"""Publish sales created event"""
try:
if not self.enabled:
return True
# Create event
event = BaseEvent(
service_name="sales-service",
data={
"record_id": str(sales_data.get("id")),
"tenant_id": str(sales_data.get("tenant_id")),
"product_name": sales_data.get("product_name"),
"revenue": float(sales_data.get("revenue", 0)),
"quantity_sold": sales_data.get("quantity_sold", 0),
"timestamp": datetime.now().isoformat()
},
event_type="sales.created",
correlation_id=correlation_id
)
# Publish via RabbitMQ
client = await self._get_rabbitmq_client()
success = await client.publish_event(
exchange_name="sales.events",
routing_key="sales.created",
event_data=event.to_dict()
)
if success:
logger.info("Sales record created event published",
record_id=sales_data.get("id"),
tenant_id=sales_data.get("tenant_id"),
product=sales_data.get("product_name"))
return success
except Exception as e:
logger.warning("Failed to publish sales created event", error=str(e))
return False
async def publish_sales_updated(self, sales_data: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
"""Publish sales updated event"""
try:
if not self.enabled:
return True
event = BaseEvent(
service_name="sales-service",
data={
"record_id": str(sales_data.get("id")),
"tenant_id": str(sales_data.get("tenant_id")),
"product_name": sales_data.get("product_name"),
"timestamp": datetime.now().isoformat()
},
event_type="sales.updated",
correlation_id=correlation_id
)
client = await self._get_rabbitmq_client()
success = await client.publish_event(
exchange_name="sales.events",
routing_key="sales.updated",
event_data=event.to_dict()
)
if success:
logger.info("Sales record updated event published",
record_id=sales_data.get("id"),
tenant_id=sales_data.get("tenant_id"))
return success
except Exception as e:
logger.warning("Failed to publish sales updated event", error=str(e))
return False
async def publish_sales_deleted(self, record_id: UUID, tenant_id: UUID, correlation_id: Optional[str] = None) -> bool:
"""Publish sales deleted event"""
try:
if not self.enabled:
return True
event = BaseEvent(
service_name="sales-service",
data={
"record_id": str(record_id),
"tenant_id": str(tenant_id),
"timestamp": datetime.now().isoformat()
},
event_type="sales.deleted",
correlation_id=correlation_id
)
client = await self._get_rabbitmq_client()
success = await client.publish_event(
exchange_name="sales.events",
routing_key="sales.deleted",
event_data=event.to_dict()
)
if success:
logger.info("Sales record deleted event published",
record_id=record_id,
tenant_id=tenant_id)
return success
except Exception as e:
logger.warning("Failed to publish sales deleted event", error=str(e))
return False
async def publish_data_imported(self, import_result: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
"""Publish data imported event"""
try:
if not self.enabled:
return True
event = DataImportedEvent(
service_name="sales-service",
data={
"records_created": import_result.get("records_created", 0),
"records_updated": import_result.get("records_updated", 0),
"records_failed": import_result.get("records_failed", 0),
"tenant_id": str(import_result.get("tenant_id")),
"success": import_result.get("success", False),
"file_name": import_result.get("file_name"),
"timestamp": datetime.now().isoformat()
},
correlation_id=correlation_id
)
client = await self._get_rabbitmq_client()
success = await client.publish_event(
exchange_name="data.events",
routing_key="data.imported",
event_data=event.to_dict()
)
if success:
logger.info("Sales data imported event published",
records_created=import_result.get("records_created"),
tenant_id=import_result.get("tenant_id"),
success=import_result.get("success"))
return success
except Exception as e:
logger.warning("Failed to publish data imported event", error=str(e))
return False
async def publish_analytics_generated(self, analytics_data: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
"""Publish analytics generated event"""
try:
if not self.enabled:
return True
event = BaseEvent(
service_name="sales-service",
data={
"tenant_id": str(analytics_data.get("tenant_id")),
"total_revenue": float(analytics_data.get("total_revenue", 0)),
"total_quantity": analytics_data.get("total_quantity", 0),
"total_transactions": analytics_data.get("total_transactions", 0),
"period_start": analytics_data.get("period_start"),
"period_end": analytics_data.get("period_end"),
"timestamp": datetime.now().isoformat()
},
event_type="analytics.generated",
correlation_id=correlation_id
)
client = await self._get_rabbitmq_client()
success = await client.publish_event(
exchange_name="analytics.events",
routing_key="analytics.generated",
event_data=event.to_dict()
)
if success:
logger.info("Sales analytics generated event published",
tenant_id=analytics_data.get("tenant_id"),
total_revenue=analytics_data.get("total_revenue"))
return success
except Exception as e:
logger.warning("Failed to publish analytics generated event", error=str(e))
return False
async def cleanup(self):
"""Cleanup RabbitMQ connections"""
if self._rabbitmq_client:
await self._rabbitmq_client.disconnect()
# Global instance
sales_publisher = SalesEventPublisher()

View File

@@ -0,0 +1,171 @@
# services/sales/app/services/product_service.py
"""
Product Service - Business Logic Layer
"""
from typing import List, Optional, Dict, Any
from uuid import UUID
from datetime import datetime
import structlog
from app.models.sales import Product
from app.repositories.product_repository import ProductRepository
from app.schemas.sales import ProductCreate, ProductUpdate
from app.core.database import get_db_transaction
logger = structlog.get_logger()
class ProductService:
"""Service layer for product operations"""
def __init__(self):
pass
async def create_product(
self,
product_data: ProductCreate,
tenant_id: UUID,
user_id: Optional[UUID] = None
) -> Product:
"""Create a new product with business validation"""
try:
# Business validation
await self._validate_product_data(product_data, tenant_id)
async with get_db_transaction() as db:
repository = ProductRepository(db)
product = await repository.create_product(product_data, tenant_id)
logger.info("Created product", product_id=product.id, tenant_id=tenant_id)
return product
except Exception as e:
logger.error("Failed to create product", error=str(e), tenant_id=tenant_id)
raise
async def update_product(
self,
product_id: UUID,
update_data: ProductUpdate,
tenant_id: UUID
) -> Product:
"""Update a product"""
try:
async with get_db_transaction() as db:
repository = ProductRepository(db)
# Verify product belongs to tenant
existing_product = await repository.get_by_id(product_id)
if not existing_product or existing_product.tenant_id != tenant_id:
raise ValueError(f"Product {product_id} not found for tenant {tenant_id}")
# Update the product
updated_product = await repository.update(product_id, update_data.model_dump(exclude_unset=True))
logger.info("Updated product", product_id=product_id, tenant_id=tenant_id)
return updated_product
except Exception as e:
logger.error("Failed to update product", error=str(e), product_id=product_id, tenant_id=tenant_id)
raise
async def get_products(self, tenant_id: UUID) -> List[Product]:
"""Get all products for a tenant"""
try:
async with get_db_transaction() as db:
repository = ProductRepository(db)
products = await repository.get_by_tenant(tenant_id)
logger.info("Retrieved products", count=len(products), tenant_id=tenant_id)
return products
except Exception as e:
logger.error("Failed to get products", error=str(e), tenant_id=tenant_id)
raise
async def get_product(self, product_id: UUID, tenant_id: UUID) -> Optional[Product]:
"""Get a specific product"""
try:
async with get_db_transaction() as db:
repository = ProductRepository(db)
product = await repository.get_by_id(product_id)
# Verify product belongs to tenant
if product and product.tenant_id != tenant_id:
return None
return product
except Exception as e:
logger.error("Failed to get product", error=str(e), product_id=product_id, tenant_id=tenant_id)
raise
async def delete_product(self, product_id: UUID, tenant_id: UUID) -> bool:
"""Delete a product"""
try:
async with get_db_transaction() as db:
repository = ProductRepository(db)
# Verify product belongs to tenant
existing_product = await repository.get_by_id(product_id)
if not existing_product or existing_product.tenant_id != tenant_id:
raise ValueError(f"Product {product_id} not found for tenant {tenant_id}")
success = await repository.delete(product_id)
if success:
logger.info("Deleted product", product_id=product_id, tenant_id=tenant_id)
return success
except Exception as e:
logger.error("Failed to delete product", error=str(e), product_id=product_id, tenant_id=tenant_id)
raise
async def get_products_by_category(self, tenant_id: UUID, category: str) -> List[Product]:
"""Get products by category"""
try:
async with get_db_transaction() as db:
repository = ProductRepository(db)
products = await repository.get_by_category(tenant_id, category)
logger.info("Retrieved products by category", count=len(products), category=category, tenant_id=tenant_id)
return products
except Exception as e:
logger.error("Failed to get products by category", error=str(e), category=category, tenant_id=tenant_id)
raise
async def search_products(self, tenant_id: UUID, search_term: str) -> List[Product]:
"""Search products by name or SKU"""
try:
async with get_db_transaction() as db:
repository = ProductRepository(db)
products = await repository.search_products(tenant_id, search_term)
logger.info("Searched products", count=len(products), search_term=search_term, tenant_id=tenant_id)
return products
except Exception as e:
logger.error("Failed to search products", error=str(e), search_term=search_term, tenant_id=tenant_id)
raise
async def _validate_product_data(self, product_data: ProductCreate, tenant_id: UUID):
"""Validate product data according to business rules"""
# Check if product with same SKU already exists
if product_data.sku:
async with get_db_transaction() as db:
repository = ProductRepository(db)
existing_product = await repository.get_by_sku(tenant_id, product_data.sku)
if existing_product:
raise ValueError(f"Product with SKU {product_data.sku} already exists for tenant {tenant_id}")
# Validate seasonal dates
if product_data.is_seasonal:
if not product_data.seasonal_start or not product_data.seasonal_end:
raise ValueError("Seasonal products must have start and end dates")
if product_data.seasonal_start >= product_data.seasonal_end:
raise ValueError("Seasonal start date must be before end date")
logger.info("Product data validation passed", tenant_id=tenant_id)

View File

@@ -0,0 +1,282 @@
# services/sales/app/services/sales_service.py
"""
Sales Service - Business Logic Layer
"""
from typing import List, Optional, Dict, Any
from uuid import UUID
from datetime import datetime
import structlog
from app.models.sales import SalesData
from app.repositories.sales_repository import SalesRepository
from app.schemas.sales import SalesDataCreate, SalesDataUpdate, SalesDataQuery, SalesAnalytics
from app.core.database import get_db_transaction
from shared.database.exceptions import DatabaseError
logger = structlog.get_logger()
class SalesService:
"""Service layer for sales operations"""
def __init__(self):
pass
async def create_sales_record(
self,
sales_data: SalesDataCreate,
tenant_id: UUID,
user_id: Optional[UUID] = None
) -> SalesData:
"""Create a new sales record with business validation"""
try:
# Business validation
await self._validate_sales_data(sales_data, tenant_id)
# Set user who created the record
if user_id:
sales_data_dict = sales_data.model_dump()
sales_data_dict['created_by'] = user_id
sales_data = SalesDataCreate(**sales_data_dict)
async with get_db_transaction() as db:
repository = SalesRepository(db)
record = await repository.create_sales_record(sales_data, tenant_id)
# Additional business logic (e.g., notifications, analytics updates)
await self._post_create_actions(record)
return record
except Exception as e:
logger.error("Failed to create sales record in service", error=str(e), tenant_id=tenant_id)
raise
async def update_sales_record(
self,
record_id: UUID,
update_data: SalesDataUpdate,
tenant_id: UUID
) -> SalesData:
"""Update a sales record"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
# Verify record belongs to tenant
existing_record = await repository.get_by_id(record_id)
if not existing_record or existing_record.tenant_id != tenant_id:
raise ValueError(f"Sales record {record_id} not found for tenant {tenant_id}")
# Update the record
updated_record = await repository.update(record_id, update_data.model_dump(exclude_unset=True))
logger.info("Updated sales record", record_id=record_id, tenant_id=tenant_id)
return updated_record
except Exception as e:
logger.error("Failed to update sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
raise
async def get_sales_records(
self,
tenant_id: UUID,
query_params: Optional[SalesDataQuery] = None
) -> List[SalesData]:
"""Get sales records for a tenant"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
records = await repository.get_by_tenant(tenant_id, query_params)
logger.info("Retrieved sales records", count=len(records), tenant_id=tenant_id)
return records
except Exception as e:
logger.error("Failed to get sales records", error=str(e), tenant_id=tenant_id)
raise
async def get_sales_record(self, record_id: UUID, tenant_id: UUID) -> Optional[SalesData]:
"""Get a specific sales record"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
record = await repository.get_by_id(record_id)
# Verify record belongs to tenant
if record and record.tenant_id != tenant_id:
return None
return record
except Exception as e:
logger.error("Failed to get sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
raise
async def delete_sales_record(self, record_id: UUID, tenant_id: UUID) -> bool:
"""Delete a sales record"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
# Verify record belongs to tenant
existing_record = await repository.get_by_id(record_id)
if not existing_record or existing_record.tenant_id != tenant_id:
raise ValueError(f"Sales record {record_id} not found for tenant {tenant_id}")
success = await repository.delete(record_id)
if success:
logger.info("Deleted sales record", record_id=record_id, tenant_id=tenant_id)
return success
except Exception as e:
logger.error("Failed to delete sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
raise
async def get_product_sales(
self,
tenant_id: UUID,
product_name: str,
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None
) -> List[SalesData]:
"""Get sales records for a specific product"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
records = await repository.get_by_product(tenant_id, product_name, start_date, end_date)
logger.info(
"Retrieved product sales",
count=len(records),
product=product_name,
tenant_id=tenant_id
)
return records
except Exception as e:
logger.error("Failed to get product sales", error=str(e), tenant_id=tenant_id, product=product_name)
raise
async def get_sales_analytics(
self,
tenant_id: UUID,
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None
) -> Dict[str, Any]:
"""Get sales analytics for a tenant"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
analytics = await repository.get_analytics(tenant_id, start_date, end_date)
logger.info("Retrieved sales analytics", tenant_id=tenant_id)
return analytics
except Exception as e:
logger.error("Failed to get sales analytics", error=str(e), tenant_id=tenant_id)
raise
async def get_product_categories(self, tenant_id: UUID) -> List[str]:
"""Get distinct product categories"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
categories = await repository.get_product_categories(tenant_id)
return categories
except Exception as e:
logger.error("Failed to get product categories", error=str(e), tenant_id=tenant_id)
raise
async def validate_sales_record(
self,
record_id: UUID,
tenant_id: UUID,
validation_notes: Optional[str] = None
) -> SalesData:
"""Validate a sales record"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
# Verify record belongs to tenant
existing_record = await repository.get_by_id(record_id)
if not existing_record or existing_record.tenant_id != tenant_id:
raise ValueError(f"Sales record {record_id} not found for tenant {tenant_id}")
validated_record = await repository.validate_record(record_id, validation_notes)
logger.info("Validated sales record", record_id=record_id, tenant_id=tenant_id)
return validated_record
except Exception as e:
logger.error("Failed to validate sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
raise
async def _validate_sales_data(self, sales_data: SalesDataCreate, tenant_id: UUID):
"""Validate sales data according to business rules"""
# Example business validations
# Check if revenue matches quantity * unit_price (if unit_price provided)
if sales_data.unit_price and sales_data.quantity_sold:
expected_revenue = sales_data.unit_price * sales_data.quantity_sold
# Apply discount if any
if sales_data.discount_applied:
expected_revenue *= (1 - sales_data.discount_applied / 100)
# Allow for small rounding differences
if abs(float(sales_data.revenue) - float(expected_revenue)) > 0.01:
logger.warning(
"Revenue mismatch detected",
expected=float(expected_revenue),
actual=float(sales_data.revenue),
tenant_id=tenant_id
)
# Check date validity (not in future)
if sales_data.date > datetime.utcnow():
raise ValueError("Sales date cannot be in the future")
# Additional business rules can be added here
logger.info("Sales data validation passed", tenant_id=tenant_id)
async def _post_create_actions(self, record: SalesData):
"""Actions to perform after creating a sales record"""
try:
# Here you could:
# - Send notifications
# - Update analytics caches
# - Trigger ML model updates
# - Update inventory levels (future integration)
logger.info("Post-create actions completed", record_id=record.id)
except Exception as e:
# Don't fail the main operation for auxiliary actions
logger.warning("Failed to execute post-create actions", error=str(e), record_id=record.id)
async def get_products_list(self, tenant_id: str) -> List[Dict[str, Any]]:
"""Get list of all products with sales data for tenant using repository pattern"""
try:
async with get_db_transaction() as db:
repository = SalesRepository(db)
# Use repository method for product statistics
products = await repository.get_product_statistics(tenant_id)
logger.debug("Products list retrieved successfully",
tenant_id=tenant_id,
product_count=len(products))
return products
except Exception as e:
logger.error("Failed to get products list",
error=str(e),
tenant_id=tenant_id)
raise DatabaseError(f"Failed to get products list: {str(e)}")