REFACTOR data service
This commit is contained in:
8
services/sales/app/services/__init__.py
Normal file
8
services/sales/app/services/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
# services/sales/app/services/__init__.py
|
||||
|
||||
from .sales_service import SalesService
|
||||
from .product_service import ProductService
|
||||
from .data_import_service import DataImportService
|
||||
from .messaging import SalesEventPublisher, sales_publisher
|
||||
|
||||
__all__ = ["SalesService", "ProductService", "DataImportService", "SalesEventPublisher", "sales_publisher"]
|
||||
943
services/sales/app/services/data_import_service.py
Normal file
943
services/sales/app/services/data_import_service.py
Normal file
@@ -0,0 +1,943 @@
|
||||
# services/sales/app/services/data_import_service.py
|
||||
"""
|
||||
Data Import Service
|
||||
Service for importing sales data using repository pattern and enhanced error handling
|
||||
"""
|
||||
|
||||
import csv
|
||||
import io
|
||||
import json
|
||||
import base64
|
||||
import pandas as pd
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from datetime import datetime, timezone
|
||||
import structlog
|
||||
import re
|
||||
|
||||
from app.repositories.sales_repository import SalesRepository
|
||||
from app.models.sales import SalesData
|
||||
from app.schemas.sales import SalesDataCreate
|
||||
from app.core.database import get_db_transaction
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
# Import result schemas (dataclass definitions)
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Dict, Any
|
||||
|
||||
@dataclass
|
||||
class SalesValidationResult:
|
||||
is_valid: bool
|
||||
total_records: int
|
||||
valid_records: int
|
||||
invalid_records: int
|
||||
errors: List[Dict[str, Any]]
|
||||
warnings: List[Dict[str, Any]]
|
||||
summary: Dict[str, Any]
|
||||
|
||||
@dataclass
|
||||
class SalesImportResult:
|
||||
success: bool
|
||||
records_processed: int
|
||||
records_created: int
|
||||
records_updated: int
|
||||
records_failed: int
|
||||
errors: List[Dict[str, Any]]
|
||||
warnings: List[Dict[str, Any]]
|
||||
processing_time_seconds: float
|
||||
|
||||
|
||||
class DataImportService:
|
||||
"""Enhanced data import service using repository pattern with STRICT validation for production"""
|
||||
|
||||
# PRODUCTION VALIDATION CONFIGURATION
|
||||
STRICT_VALIDATION = True # Set to False for lenient validation, True for production quality
|
||||
MAX_QUANTITY_PER_DAY = 10000 # Maximum reasonable quantity per product per day
|
||||
MAX_REVENUE_PER_ITEM = 100000 # Maximum reasonable revenue per line item
|
||||
MAX_UNIT_PRICE = 10000 # Maximum reasonable price per unit for bakery items
|
||||
|
||||
# Common column mappings for different languages/formats
|
||||
COLUMN_MAPPINGS = {
|
||||
'date': ['date', 'fecha', 'datum', 'data', 'dia'],
|
||||
'datetime': ['datetime', 'fecha_hora', 'timestamp'],
|
||||
'product': ['product', 'producto', 'item', 'articulo', 'nombre', 'name'],
|
||||
'product_name': ['product_name', 'nombre_producto', 'item_name'],
|
||||
'quantity': ['quantity', 'cantidad', 'qty', 'units', 'unidades'],
|
||||
'quantity_sold': ['quantity_sold', 'cantidad_vendida', 'sold'],
|
||||
'revenue': ['revenue', 'ingresos', 'sales', 'ventas', 'total', 'importe'],
|
||||
'price': ['price', 'precio', 'cost', 'coste'],
|
||||
'location': ['location', 'ubicacion', 'tienda', 'store', 'punto_venta'],
|
||||
'location_id': ['location_id', 'store_id', 'tienda_id'],
|
||||
}
|
||||
|
||||
DATE_FORMATS = [
|
||||
'%Y-%m-%d', '%d/%m/%Y', '%m/%d/%Y', '%d-%m-%Y', '%m-%d-%Y',
|
||||
'%d.%m.%Y', '%Y/%m/%d', '%d/%m/%y', '%m/%d/%y',
|
||||
'%Y-%m-%d %H:%M:%S', '%d/%m/%Y %H:%M',
|
||||
]
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize enhanced import service"""
|
||||
pass
|
||||
|
||||
async def validate_import_data(self, data: Dict[str, Any]) -> SalesValidationResult:
|
||||
"""Enhanced validation with better error handling and suggestions"""
|
||||
try:
|
||||
logger.info("Starting enhanced import data validation", tenant_id=data.get("tenant_id"))
|
||||
|
||||
validation_result = SalesValidationResult(
|
||||
is_valid=True,
|
||||
total_records=0,
|
||||
valid_records=0,
|
||||
invalid_records=0,
|
||||
errors=[],
|
||||
warnings=[],
|
||||
summary={}
|
||||
)
|
||||
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
# Basic validation checks
|
||||
if not data.get("tenant_id"):
|
||||
errors.append({
|
||||
"type": "missing_field",
|
||||
"message": "tenant_id es requerido",
|
||||
"field": "tenant_id",
|
||||
"row": None,
|
||||
"code": "MISSING_TENANT_ID"
|
||||
})
|
||||
|
||||
if not data.get("data"):
|
||||
errors.append({
|
||||
"type": "missing_data",
|
||||
"message": "Datos de archivo faltantes",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "NO_DATA_PROVIDED"
|
||||
})
|
||||
|
||||
validation_result.is_valid = False
|
||||
validation_result.errors = errors
|
||||
validation_result.summary = {
|
||||
"status": "failed",
|
||||
"reason": "no_data_provided",
|
||||
"file_format": data.get("data_format", "unknown"),
|
||||
"suggestions": ["Selecciona un archivo válido para importar"]
|
||||
}
|
||||
return validation_result
|
||||
|
||||
# Validate file format
|
||||
format_type = data.get("data_format", "").lower()
|
||||
supported_formats = ["csv", "excel", "xlsx", "xls", "json", "pos"]
|
||||
|
||||
if format_type not in supported_formats:
|
||||
errors.append({
|
||||
"type": "unsupported_format",
|
||||
"message": f"Formato no soportado: {format_type}",
|
||||
"field": "data_format",
|
||||
"row": None,
|
||||
"code": "UNSUPPORTED_FORMAT"
|
||||
})
|
||||
|
||||
# Validate data size
|
||||
data_content = data.get("data", "")
|
||||
data_size = len(data_content)
|
||||
|
||||
if data_size == 0:
|
||||
errors.append({
|
||||
"type": "empty_file",
|
||||
"message": "El archivo está vacío",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "EMPTY_FILE"
|
||||
})
|
||||
elif data_size > 10 * 1024 * 1024: # 10MB limit
|
||||
errors.append({
|
||||
"type": "file_too_large",
|
||||
"message": "Archivo demasiado grande (máximo 10MB)",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "FILE_TOO_LARGE"
|
||||
})
|
||||
elif data_size > 1024 * 1024: # 1MB warning
|
||||
warnings.append({
|
||||
"type": "large_file",
|
||||
"message": "Archivo grande detectado. El procesamiento puede tomar más tiempo.",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "LARGE_FILE_WARNING"
|
||||
})
|
||||
|
||||
# Analyze CSV content if format is CSV
|
||||
if format_type == "csv" and data_content and not errors:
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(data_content))
|
||||
rows = list(reader)
|
||||
|
||||
validation_result.total_records = len(rows)
|
||||
|
||||
if not rows:
|
||||
errors.append({
|
||||
"type": "empty_content",
|
||||
"message": "El archivo CSV no contiene datos",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "NO_CONTENT"
|
||||
})
|
||||
else:
|
||||
# Enhanced column analysis
|
||||
headers = list(rows[0].keys()) if rows else []
|
||||
column_mapping = self._detect_columns(headers)
|
||||
|
||||
# Check for required columns
|
||||
if not column_mapping.get('date'):
|
||||
errors.append({
|
||||
"type": "missing_column",
|
||||
"message": "Columna de fecha no encontrada",
|
||||
"field": "date",
|
||||
"row": None,
|
||||
"code": "MISSING_DATE_COLUMN"
|
||||
})
|
||||
|
||||
if not column_mapping.get('product'):
|
||||
errors.append({
|
||||
"type": "missing_column",
|
||||
"message": "Columna de producto no encontrada",
|
||||
"field": "product",
|
||||
"row": None,
|
||||
"code": "MISSING_PRODUCT_COLUMN"
|
||||
})
|
||||
|
||||
if not column_mapping.get('quantity'):
|
||||
warnings.append({
|
||||
"type": "missing_column",
|
||||
"message": "Columna de cantidad no encontrada, se usará 1 por defecto",
|
||||
"field": "quantity",
|
||||
"row": None,
|
||||
"code": "MISSING_QUANTITY_COLUMN"
|
||||
})
|
||||
|
||||
# Enhanced data quality estimation
|
||||
if not errors:
|
||||
sample_size = min(10, len(rows))
|
||||
sample_rows = rows[:sample_size]
|
||||
quality_issues = 0
|
||||
|
||||
for i, row in enumerate(sample_rows):
|
||||
parsed_data = await self._parse_row_data(row, column_mapping, i + 1)
|
||||
if parsed_data.get("skip") or parsed_data.get("errors"):
|
||||
quality_issues += 1
|
||||
|
||||
estimated_error_rate = (quality_issues / sample_size) * 100 if sample_size > 0 else 0
|
||||
estimated_invalid = int(validation_result.total_records * estimated_error_rate / 100)
|
||||
|
||||
validation_result.valid_records = validation_result.total_records - estimated_invalid
|
||||
validation_result.invalid_records = estimated_invalid
|
||||
|
||||
# STRICT: Any data quality issues should fail validation for production
|
||||
if estimated_error_rate > 0:
|
||||
errors.append({
|
||||
"type": "data_quality_error",
|
||||
"message": f"Falló la validación de calidad: {estimated_error_rate:.0f}% de los datos tienen errores críticos",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "DATA_QUALITY_FAILED"
|
||||
})
|
||||
|
||||
# Add specific error details
|
||||
if estimated_error_rate > 50:
|
||||
errors.append({
|
||||
"type": "data_quality_critical",
|
||||
"message": f"Calidad de datos crítica: más del 50% de los registros tienen errores",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "DATA_QUALITY_CRITICAL"
|
||||
})
|
||||
elif estimated_error_rate > 20:
|
||||
errors.append({
|
||||
"type": "data_quality_high",
|
||||
"message": f"Alta tasa de errores detectada: {estimated_error_rate:.0f}% de los datos requieren corrección",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "DATA_QUALITY_HIGH_ERROR_RATE"
|
||||
})
|
||||
else:
|
||||
# Even small error rates are now treated as validation failures
|
||||
errors.append({
|
||||
"type": "data_quality_detected",
|
||||
"message": f"Se detectaron errores de validación en {estimated_error_rate:.0f}% de los datos",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "DATA_QUALITY_ERRORS_FOUND"
|
||||
})
|
||||
else:
|
||||
validation_result.valid_records = 0
|
||||
validation_result.invalid_records = validation_result.total_records
|
||||
|
||||
except Exception as csv_error:
|
||||
logger.warning("Enhanced CSV analysis failed", error=str(csv_error))
|
||||
warnings.append({
|
||||
"type": "analysis_warning",
|
||||
"message": f"No se pudo analizar completamente el CSV: {str(csv_error)}",
|
||||
"field": "data",
|
||||
"row": None,
|
||||
"code": "CSV_ANALYSIS_WARNING"
|
||||
})
|
||||
|
||||
# Set validation result
|
||||
validation_result.is_valid = len(errors) == 0
|
||||
validation_result.errors = errors
|
||||
validation_result.warnings = warnings
|
||||
|
||||
# Enhanced summary generation
|
||||
validation_result.summary = {
|
||||
"status": "valid" if validation_result.is_valid else "invalid",
|
||||
"file_format": format_type,
|
||||
"file_size_bytes": data_size,
|
||||
"file_size_mb": round(data_size / (1024 * 1024), 2),
|
||||
"estimated_processing_time_seconds": max(1, validation_result.total_records // 100),
|
||||
"validation_timestamp": datetime.utcnow().isoformat(),
|
||||
"detected_columns": list(self._detect_columns(list(csv.DictReader(io.StringIO(data_content)).fieldnames or [])).keys()) if format_type == "csv" and data_content else [],
|
||||
"suggestions": self._generate_suggestions(validation_result, format_type, len(warnings))
|
||||
}
|
||||
|
||||
logger.info("Enhanced import validation completed",
|
||||
is_valid=validation_result.is_valid,
|
||||
total_records=validation_result.total_records,
|
||||
error_count=len(errors),
|
||||
warning_count=len(warnings))
|
||||
|
||||
return validation_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Enhanced validation process failed", error=str(e))
|
||||
|
||||
return SalesValidationResult(
|
||||
is_valid=False,
|
||||
total_records=0,
|
||||
valid_records=0,
|
||||
invalid_records=0,
|
||||
errors=[{
|
||||
"type": "system_error",
|
||||
"message": f"Error en el proceso de validación: {str(e)}",
|
||||
"field": None,
|
||||
"row": None,
|
||||
"code": "SYSTEM_ERROR"
|
||||
}],
|
||||
warnings=[],
|
||||
summary={
|
||||
"status": "error",
|
||||
"file_format": data.get("data_format", "unknown"),
|
||||
"error_type": "system_error",
|
||||
"suggestions": [
|
||||
"Intenta de nuevo con un archivo diferente",
|
||||
"Contacta soporte si el problema persiste"
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
async def process_import(
|
||||
self,
|
||||
tenant_id: str,
|
||||
content: str,
|
||||
file_format: str,
|
||||
filename: Optional[str] = None
|
||||
) -> SalesImportResult:
|
||||
"""Enhanced data import processing with better error handling"""
|
||||
start_time = datetime.utcnow()
|
||||
|
||||
try:
|
||||
logger.info("Starting enhanced data import",
|
||||
filename=filename,
|
||||
format=file_format,
|
||||
tenant_id=tenant_id)
|
||||
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
|
||||
# Process data based on format
|
||||
if file_format.lower() == 'csv':
|
||||
result = await self._process_csv_data(tenant_id, content, repository, filename)
|
||||
elif file_format.lower() == 'json':
|
||||
result = await self._process_json_data(tenant_id, content, repository, filename)
|
||||
elif file_format.lower() in ['excel', 'xlsx']:
|
||||
result = await self._process_excel_data(tenant_id, content, repository, filename)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {file_format}")
|
||||
|
||||
# Calculate processing time
|
||||
end_time = datetime.utcnow()
|
||||
processing_time = (end_time - start_time).total_seconds()
|
||||
|
||||
# Build enhanced final result
|
||||
final_result = SalesImportResult(
|
||||
success=result.get("success", False),
|
||||
records_processed=result.get("total_rows", 0),
|
||||
records_created=result.get("records_created", 0),
|
||||
records_updated=0, # We don't update, only create
|
||||
records_failed=result.get("total_rows", 0) - result.get("records_created", 0),
|
||||
errors=self._structure_messages(result.get("errors", [])),
|
||||
warnings=self._structure_messages(result.get("warnings", [])),
|
||||
processing_time_seconds=processing_time
|
||||
)
|
||||
|
||||
logger.info("Enhanced data import completed successfully",
|
||||
records_created=final_result.records_created,
|
||||
processing_time=processing_time)
|
||||
|
||||
return final_result
|
||||
|
||||
except Exception as e:
|
||||
end_time = datetime.utcnow()
|
||||
processing_time = (end_time - start_time).total_seconds()
|
||||
|
||||
logger.error("Enhanced data import failed", error=str(e), tenant_id=tenant_id)
|
||||
|
||||
return SalesImportResult(
|
||||
success=False,
|
||||
records_processed=0,
|
||||
records_created=0,
|
||||
records_updated=0,
|
||||
records_failed=0,
|
||||
errors=[{
|
||||
"type": "import_error",
|
||||
"message": f"Import failed: {str(e)}",
|
||||
"field": None,
|
||||
"row": None,
|
||||
"code": "IMPORT_FAILURE"
|
||||
}],
|
||||
warnings=[],
|
||||
processing_time_seconds=processing_time
|
||||
)
|
||||
|
||||
async def _process_csv_data(
|
||||
self,
|
||||
tenant_id: str,
|
||||
csv_content: str,
|
||||
repository: SalesRepository,
|
||||
filename: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Enhanced CSV processing with better data handling"""
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(csv_content))
|
||||
rows = list(reader)
|
||||
|
||||
if not rows:
|
||||
return {
|
||||
"success": False,
|
||||
"total_rows": 0,
|
||||
"records_created": 0,
|
||||
"errors": ["CSV file is empty"],
|
||||
"warnings": []
|
||||
}
|
||||
|
||||
# Enhanced column mapping
|
||||
column_mapping = self._detect_columns(list(rows[0].keys()))
|
||||
|
||||
records_created = 0
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
logger.info(f"Processing {len(rows)} records from CSV with enhanced mapping")
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
try:
|
||||
# Enhanced data parsing and validation
|
||||
parsed_data = await self._parse_row_data(row, column_mapping, index + 1)
|
||||
if parsed_data.get("skip"):
|
||||
errors.extend(parsed_data.get("errors", []))
|
||||
warnings.extend(parsed_data.get("warnings", []))
|
||||
continue
|
||||
|
||||
# Create sales record with enhanced data
|
||||
sales_data = SalesDataCreate(
|
||||
tenant_id=tenant_id,
|
||||
date=parsed_data["date"],
|
||||
product_name=parsed_data["product_name"],
|
||||
product_category=parsed_data.get("product_category"),
|
||||
quantity_sold=parsed_data["quantity_sold"],
|
||||
unit_price=parsed_data.get("unit_price"),
|
||||
revenue=parsed_data.get("revenue"),
|
||||
location_id=parsed_data.get("location_id"),
|
||||
source="csv"
|
||||
)
|
||||
|
||||
created_record = await repository.create_sales_record(sales_data, tenant_id)
|
||||
records_created += 1
|
||||
|
||||
# Enhanced progress logging
|
||||
if records_created % 100 == 0:
|
||||
logger.info(f"Enhanced processing: {records_created}/{len(rows)} records completed...")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Row {index + 1}: {str(e)}"
|
||||
errors.append(error_msg)
|
||||
logger.warning("Enhanced record processing failed", error=error_msg)
|
||||
|
||||
success_rate = (records_created / len(rows)) * 100 if rows else 0
|
||||
|
||||
return {
|
||||
"success": records_created > 0,
|
||||
"total_rows": len(rows),
|
||||
"records_created": records_created,
|
||||
"success_rate": success_rate,
|
||||
"errors": errors,
|
||||
"warnings": warnings
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Enhanced CSV processing failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def _process_json_data(
|
||||
self,
|
||||
tenant_id: str,
|
||||
json_content: str,
|
||||
repository: SalesRepository,
|
||||
filename: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Enhanced JSON processing with pandas integration"""
|
||||
try:
|
||||
# Parse JSON with base64 support
|
||||
if json_content.startswith('data:'):
|
||||
json_content = base64.b64decode(json_content.split(',')[1]).decode('utf-8')
|
||||
|
||||
data = json.loads(json_content)
|
||||
|
||||
# Handle different JSON structures
|
||||
if isinstance(data, dict):
|
||||
if 'data' in data:
|
||||
records = data['data']
|
||||
elif 'records' in data:
|
||||
records = data['records']
|
||||
elif 'sales' in data:
|
||||
records = data['sales']
|
||||
else:
|
||||
records = [data] # Single record
|
||||
elif isinstance(data, list):
|
||||
records = data
|
||||
else:
|
||||
raise ValueError("Invalid JSON format")
|
||||
|
||||
# Convert to DataFrame for enhanced processing
|
||||
if records:
|
||||
df = pd.DataFrame(records)
|
||||
df.columns = df.columns.str.strip().str.lower()
|
||||
|
||||
return await self._process_dataframe(tenant_id, df, repository, "json", filename)
|
||||
else:
|
||||
return {
|
||||
"success": False,
|
||||
"total_rows": 0,
|
||||
"records_created": 0,
|
||||
"errors": ["No records found in JSON"],
|
||||
"warnings": []
|
||||
}
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Invalid JSON: {str(e)}")
|
||||
except Exception as e:
|
||||
logger.error("Enhanced JSON processing failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def _process_excel_data(
|
||||
self,
|
||||
tenant_id: str,
|
||||
excel_content: str,
|
||||
repository: SalesRepository,
|
||||
filename: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Enhanced Excel processing with base64 support"""
|
||||
try:
|
||||
# Decode base64 content
|
||||
if excel_content.startswith('data:'):
|
||||
excel_bytes = base64.b64decode(excel_content.split(',')[1])
|
||||
else:
|
||||
excel_bytes = base64.b64decode(excel_content)
|
||||
|
||||
# Read Excel file with pandas
|
||||
df = pd.read_excel(io.BytesIO(excel_bytes), sheet_name=0)
|
||||
|
||||
# Enhanced column cleaning
|
||||
df.columns = df.columns.str.strip().str.lower()
|
||||
|
||||
# Remove empty rows
|
||||
df = df.dropna(how='all')
|
||||
|
||||
return await self._process_dataframe(tenant_id, df, repository, "excel", filename)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Enhanced Excel processing failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def _process_dataframe(
|
||||
self,
|
||||
tenant_id: str,
|
||||
df: pd.DataFrame,
|
||||
repository: SalesRepository,
|
||||
source: str,
|
||||
filename: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Enhanced DataFrame processing with better error handling"""
|
||||
try:
|
||||
# Enhanced column mapping
|
||||
column_mapping = self._detect_columns(df.columns.tolist())
|
||||
|
||||
if not column_mapping.get('date') or not column_mapping.get('product'):
|
||||
required_missing = []
|
||||
if not column_mapping.get('date'):
|
||||
required_missing.append("date")
|
||||
if not column_mapping.get('product'):
|
||||
required_missing.append("product")
|
||||
|
||||
raise ValueError(f"Required columns missing: {', '.join(required_missing)}")
|
||||
|
||||
records_created = 0
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
logger.info(f"Enhanced processing of {len(df)} records from {source}")
|
||||
|
||||
for index, row in df.iterrows():
|
||||
try:
|
||||
# Convert pandas row to dict
|
||||
row_dict = {}
|
||||
for col in df.columns:
|
||||
val = row[col]
|
||||
# Handle pandas NaN values
|
||||
if pd.isna(val):
|
||||
row_dict[col] = None
|
||||
else:
|
||||
row_dict[col] = val
|
||||
|
||||
# Enhanced data parsing
|
||||
parsed_data = await self._parse_row_data(row_dict, column_mapping, index + 1)
|
||||
if parsed_data.get("skip"):
|
||||
errors.extend(parsed_data.get("errors", []))
|
||||
warnings.extend(parsed_data.get("warnings", []))
|
||||
continue
|
||||
|
||||
# Create enhanced sales record
|
||||
sales_data = SalesDataCreate(
|
||||
tenant_id=tenant_id,
|
||||
date=parsed_data["date"],
|
||||
product_name=parsed_data["product_name"],
|
||||
product_category=parsed_data.get("product_category"),
|
||||
quantity_sold=parsed_data["quantity_sold"],
|
||||
unit_price=parsed_data.get("unit_price"),
|
||||
revenue=parsed_data.get("revenue"),
|
||||
location_id=parsed_data.get("location_id"),
|
||||
source=source
|
||||
)
|
||||
|
||||
created_record = await repository.create_sales_record(sales_data, tenant_id)
|
||||
records_created += 1
|
||||
|
||||
# Progress logging for large datasets
|
||||
if records_created % 100 == 0:
|
||||
logger.info(f"Enhanced DataFrame processing: {records_created}/{len(df)} records completed...")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Row {index + 1}: {str(e)}"
|
||||
errors.append(error_msg)
|
||||
logger.warning("Enhanced record processing failed", error=error_msg)
|
||||
|
||||
success_rate = (records_created / len(df)) * 100 if len(df) > 0 else 0
|
||||
|
||||
return {
|
||||
"success": records_created > 0,
|
||||
"total_rows": len(df),
|
||||
"records_created": records_created,
|
||||
"success_rate": success_rate,
|
||||
"errors": errors[:10], # Limit errors for performance
|
||||
"warnings": warnings[:10] # Limit warnings
|
||||
}
|
||||
|
||||
except ValueError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error("Enhanced DataFrame processing failed", error=str(e))
|
||||
raise
|
||||
|
||||
async def _parse_row_data(
|
||||
self,
|
||||
row: Dict[str, Any],
|
||||
column_mapping: Dict[str, str],
|
||||
row_number: int
|
||||
) -> Dict[str, Any]:
|
||||
"""Enhanced row data parsing with better validation"""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
try:
|
||||
# Enhanced date extraction and validation
|
||||
date_str = str(row.get(column_mapping.get('date', ''), '')).strip()
|
||||
if not date_str or date_str.lower() in ['nan', 'null', 'none', '']:
|
||||
errors.append(f"Row {row_number}: Missing date")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
|
||||
parsed_date = self._parse_date(date_str)
|
||||
if not parsed_date:
|
||||
errors.append(f"Row {row_number}: Invalid date format: {date_str}")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
|
||||
# Enhanced product name extraction and cleaning
|
||||
product_name = str(row.get(column_mapping.get('product', ''), '')).strip()
|
||||
if not product_name or product_name.lower() in ['nan', 'null', 'none', '']:
|
||||
errors.append(f"Row {row_number}: Missing product name")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
|
||||
product_name = self._clean_product_name(product_name)
|
||||
|
||||
# STRICT quantity validation for production data quality
|
||||
quantity_raw = row.get(column_mapping.get('quantity', 'quantity'), 1)
|
||||
try:
|
||||
if pd.isna(quantity_raw):
|
||||
# Allow default quantity of 1 for missing values
|
||||
quantity = 1
|
||||
else:
|
||||
quantity = int(float(str(quantity_raw).replace(',', '.')))
|
||||
if quantity <= 0:
|
||||
# STRICT: Treat invalid quantities as ERRORS, not warnings
|
||||
errors.append(f"Row {row_number}: Invalid quantity ({quantity}) - quantities must be positive")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
elif self.STRICT_VALIDATION and quantity > self.MAX_QUANTITY_PER_DAY:
|
||||
# STRICT: Check for unrealistic quantities
|
||||
errors.append(f"Row {row_number}: Unrealistic quantity ({quantity}) - exceeds maximum expected daily sales ({self.MAX_QUANTITY_PER_DAY})")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
except (ValueError, TypeError):
|
||||
# STRICT: Treat non-numeric quantities as ERRORS
|
||||
errors.append(f"Row {row_number}: Invalid quantity format ({quantity_raw}) - must be a positive number")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
|
||||
# Enhanced revenue extraction
|
||||
revenue = None
|
||||
unit_price = None
|
||||
if 'revenue' in column_mapping and column_mapping['revenue'] in row:
|
||||
revenue_raw = row.get(column_mapping['revenue'])
|
||||
if revenue_raw and not pd.isna(revenue_raw) and str(revenue_raw).lower() not in ['nan', 'null', 'none', '']:
|
||||
try:
|
||||
revenue = float(str(revenue_raw).replace(',', '.').replace('€', '').replace('$', '').strip())
|
||||
if revenue < 0:
|
||||
# STRICT: Treat negative revenue as ERROR, not warning
|
||||
errors.append(f"Row {row_number}: Negative revenue ({revenue}) - revenue must be positive or zero")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
else:
|
||||
# STRICT: Check for unrealistic revenue values
|
||||
if self.STRICT_VALIDATION and revenue > self.MAX_REVENUE_PER_ITEM:
|
||||
errors.append(f"Row {row_number}: Unrealistic revenue ({revenue}) - exceeds maximum expected value ({self.MAX_REVENUE_PER_ITEM})")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
|
||||
# Calculate unit price if we have both revenue and quantity
|
||||
unit_price = revenue / quantity if quantity > 0 else None
|
||||
|
||||
# STRICT: Validate unit price reasonableness
|
||||
if unit_price and unit_price > 10000: # More than €10,000 per unit seems unrealistic for bakery
|
||||
errors.append(f"Row {row_number}: Unrealistic unit price ({unit_price:.2f}) - check quantity and revenue values")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
except (ValueError, TypeError):
|
||||
# STRICT: Treat invalid revenue format as ERROR
|
||||
errors.append(f"Row {row_number}: Invalid revenue format ({revenue_raw}) - must be a valid number")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
|
||||
# Enhanced location extraction
|
||||
location_id = None
|
||||
if 'location' in column_mapping and column_mapping['location'] in row:
|
||||
location_raw = row.get(column_mapping['location'])
|
||||
if location_raw and not pd.isna(location_raw) and str(location_raw).lower() not in ['nan', 'null', 'none', '']:
|
||||
location_id = str(location_raw).strip()
|
||||
|
||||
# Enhanced product category extraction
|
||||
product_category = None
|
||||
if 'category' in column_mapping and column_mapping['category'] in row:
|
||||
category_raw = row.get(column_mapping['category'])
|
||||
if category_raw and not pd.isna(category_raw) and str(category_raw).lower() not in ['nan', 'null', 'none', '']:
|
||||
product_category = str(category_raw).strip()
|
||||
|
||||
return {
|
||||
"skip": False,
|
||||
"date": parsed_date,
|
||||
"product_name": product_name,
|
||||
"product_category": product_category,
|
||||
"quantity_sold": quantity,
|
||||
"unit_price": unit_price,
|
||||
"revenue": revenue,
|
||||
"location_id": location_id,
|
||||
"errors": errors,
|
||||
"warnings": warnings
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
errors.append(f"Row {row_number}: Enhanced parsing error: {str(e)}")
|
||||
return {"skip": True, "errors": errors, "warnings": warnings}
|
||||
|
||||
def _detect_columns(self, columns: List[str]) -> Dict[str, str]:
|
||||
"""Enhanced column detection with fuzzy matching"""
|
||||
mapping = {}
|
||||
columns_lower = [col.lower().strip() for col in columns]
|
||||
|
||||
for standard_name, possible_names in self.COLUMN_MAPPINGS.items():
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
for col_idx, col in enumerate(columns_lower):
|
||||
for possible in possible_names:
|
||||
# Exact match (highest priority)
|
||||
if possible == col:
|
||||
best_match = columns[col_idx]
|
||||
best_score = 100
|
||||
break
|
||||
# Contains match
|
||||
elif possible in col or col in possible:
|
||||
score = len(possible) / len(col) * 90
|
||||
if score > best_score:
|
||||
best_match = columns[col_idx]
|
||||
best_score = score
|
||||
|
||||
if best_score == 100: # Found exact match
|
||||
break
|
||||
|
||||
if best_match and best_score > 70: # Threshold for matches
|
||||
mapping[standard_name] = best_match
|
||||
|
||||
# Enhanced alias mapping
|
||||
if 'product' not in mapping and 'product_name' in mapping:
|
||||
mapping['product'] = mapping['product_name']
|
||||
if 'quantity' not in mapping and 'quantity_sold' in mapping:
|
||||
mapping['quantity'] = mapping['quantity_sold']
|
||||
if 'location' not in mapping and 'location_id' in mapping:
|
||||
mapping['location'] = mapping['location_id']
|
||||
|
||||
return mapping
|
||||
|
||||
def _parse_date(self, date_str: str) -> Optional[datetime]:
|
||||
"""Enhanced date parsing with pandas and multiple format support"""
|
||||
if not date_str or str(date_str).lower() in ['nan', 'null', 'none']:
|
||||
return None
|
||||
|
||||
date_str = str(date_str).strip()
|
||||
|
||||
# Try pandas first (most robust)
|
||||
try:
|
||||
parsed_dt = pd.to_datetime(date_str, dayfirst=True)
|
||||
if hasattr(parsed_dt, 'to_pydatetime'):
|
||||
parsed_dt = parsed_dt.to_pydatetime()
|
||||
|
||||
if parsed_dt.tzinfo is None:
|
||||
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
return parsed_dt
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try specific formats as fallback
|
||||
for fmt in self.DATE_FORMATS:
|
||||
try:
|
||||
parsed_dt = datetime.strptime(date_str, fmt)
|
||||
if parsed_dt.tzinfo is None:
|
||||
parsed_dt = parsed_dt.replace(tzinfo=timezone.utc)
|
||||
return parsed_dt
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
logger.warning(f"Could not parse date: {date_str}")
|
||||
return None
|
||||
|
||||
def _clean_product_name(self, product_name: str) -> str:
|
||||
"""Enhanced product name cleaning and standardization"""
|
||||
if not product_name:
|
||||
return "Producto sin nombre"
|
||||
|
||||
# Remove extra whitespace
|
||||
cleaned = re.sub(r'\s+', ' ', str(product_name).strip())
|
||||
|
||||
# Remove special characters but keep Spanish characters
|
||||
cleaned = re.sub(r'[^\w\s\-áéíóúñçüÁÉÍÓÚÑÇÜ]', '', cleaned)
|
||||
|
||||
# Capitalize first letter of each word
|
||||
cleaned = cleaned.title()
|
||||
|
||||
# Enhanced corrections for Spanish bakeries
|
||||
replacements = {
|
||||
'Pan De': 'Pan de',
|
||||
'Café Con': 'Café con',
|
||||
'Te ': 'Té ',
|
||||
'Bocadillo De': 'Bocadillo de',
|
||||
'Dulce De': 'Dulce de',
|
||||
'Tarta De': 'Tarta de',
|
||||
}
|
||||
|
||||
for old, new in replacements.items():
|
||||
cleaned = cleaned.replace(old, new)
|
||||
|
||||
return cleaned if cleaned else "Producto sin nombre"
|
||||
|
||||
def _structure_messages(self, messages: List[Union[str, Dict]]) -> List[Dict[str, Any]]:
|
||||
"""Convert string messages to structured format"""
|
||||
structured = []
|
||||
for msg in messages:
|
||||
if isinstance(msg, str):
|
||||
structured.append({
|
||||
"type": "general_message",
|
||||
"message": msg,
|
||||
"field": None,
|
||||
"row": None,
|
||||
"code": "GENERAL_MESSAGE"
|
||||
})
|
||||
else:
|
||||
structured.append(msg)
|
||||
return structured
|
||||
|
||||
def _generate_suggestions(
|
||||
self,
|
||||
validation_result: SalesValidationResult,
|
||||
format_type: str,
|
||||
warning_count: int
|
||||
) -> List[str]:
|
||||
"""Generate enhanced contextual suggestions"""
|
||||
suggestions = []
|
||||
|
||||
if validation_result.is_valid:
|
||||
suggestions.append("El archivo está listo para procesamiento")
|
||||
suggestions.append(f"Se procesarán aproximadamente {validation_result.total_records} registros")
|
||||
|
||||
if validation_result.total_records > 1000:
|
||||
suggestions.append("Archivo grande: el procesamiento puede tomar varios minutos")
|
||||
suggestions.append("Considera dividir archivos muy grandes para mejor rendimiento")
|
||||
|
||||
if warning_count > 0:
|
||||
suggestions.append("Revisa las advertencias antes de continuar")
|
||||
suggestions.append("Los datos con advertencias se procesarán con valores por defecto")
|
||||
|
||||
# Format-specific suggestions
|
||||
if format_type == "csv":
|
||||
suggestions.append("Asegúrate de que las fechas estén en formato DD/MM/YYYY")
|
||||
suggestions.append("Verifica que los números usen punto decimal (no coma)")
|
||||
elif format_type in ["excel", "xlsx"]:
|
||||
suggestions.append("Solo se procesará la primera hoja del archivo Excel")
|
||||
suggestions.append("Evita celdas combinadas y fórmulas complejas")
|
||||
else:
|
||||
suggestions.append("Corrige los errores antes de continuar")
|
||||
suggestions.append("Verifica que el archivo tenga el formato correcto")
|
||||
|
||||
if format_type not in ["csv", "excel", "xlsx", "json"]:
|
||||
suggestions.append("Usa formato CSV o Excel para mejores resultados")
|
||||
suggestions.append("El formato JSON es para usuarios avanzados")
|
||||
|
||||
if validation_result.total_records == 0:
|
||||
suggestions.append("Asegúrate de que el archivo contenga datos")
|
||||
suggestions.append("Verifica que el archivo no esté corrupto")
|
||||
|
||||
# Missing column suggestions
|
||||
error_codes = [error.get("code", "") for error in validation_result.errors if isinstance(error, dict)]
|
||||
if "MISSING_DATE_COLUMN" in error_codes:
|
||||
suggestions.append("Incluye una columna de fecha (fecha, date, dia)")
|
||||
if "MISSING_PRODUCT_COLUMN" in error_codes:
|
||||
suggestions.append("Incluye una columna de producto (producto, product, item)")
|
||||
|
||||
return suggestions
|
||||
|
||||
|
||||
# Main DataImportService class with enhanced functionality
|
||||
232
services/sales/app/services/messaging.py
Normal file
232
services/sales/app/services/messaging.py
Normal file
@@ -0,0 +1,232 @@
|
||||
# services/sales/app/services/messaging.py
|
||||
"""
|
||||
Sales Service Messaging - Event Publishing using shared messaging infrastructure
|
||||
"""
|
||||
|
||||
import structlog
|
||||
from typing import Dict, Any, Optional
|
||||
from uuid import UUID
|
||||
from datetime import datetime
|
||||
|
||||
from shared.messaging.rabbitmq import RabbitMQClient
|
||||
from shared.messaging.events import BaseEvent, DataImportedEvent
|
||||
from app.core.config import settings
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class SalesEventPublisher:
|
||||
"""Sales service event publisher using RabbitMQ"""
|
||||
|
||||
def __init__(self):
|
||||
self.enabled = True
|
||||
self._rabbitmq_client = None
|
||||
|
||||
async def _get_rabbitmq_client(self):
|
||||
"""Get or create RabbitMQ client"""
|
||||
if not self._rabbitmq_client:
|
||||
self._rabbitmq_client = RabbitMQClient(
|
||||
connection_url=settings.RABBITMQ_URL,
|
||||
service_name="sales-service"
|
||||
)
|
||||
await self._rabbitmq_client.connect()
|
||||
return self._rabbitmq_client
|
||||
|
||||
async def publish_sales_created(self, sales_data: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
|
||||
"""Publish sales created event"""
|
||||
try:
|
||||
if not self.enabled:
|
||||
return True
|
||||
|
||||
# Create event
|
||||
event = BaseEvent(
|
||||
service_name="sales-service",
|
||||
data={
|
||||
"record_id": str(sales_data.get("id")),
|
||||
"tenant_id": str(sales_data.get("tenant_id")),
|
||||
"product_name": sales_data.get("product_name"),
|
||||
"revenue": float(sales_data.get("revenue", 0)),
|
||||
"quantity_sold": sales_data.get("quantity_sold", 0),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
},
|
||||
event_type="sales.created",
|
||||
correlation_id=correlation_id
|
||||
)
|
||||
|
||||
# Publish via RabbitMQ
|
||||
client = await self._get_rabbitmq_client()
|
||||
success = await client.publish_event(
|
||||
exchange_name="sales.events",
|
||||
routing_key="sales.created",
|
||||
event_data=event.to_dict()
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Sales record created event published",
|
||||
record_id=sales_data.get("id"),
|
||||
tenant_id=sales_data.get("tenant_id"),
|
||||
product=sales_data.get("product_name"))
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish sales created event", error=str(e))
|
||||
return False
|
||||
|
||||
async def publish_sales_updated(self, sales_data: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
|
||||
"""Publish sales updated event"""
|
||||
try:
|
||||
if not self.enabled:
|
||||
return True
|
||||
|
||||
event = BaseEvent(
|
||||
service_name="sales-service",
|
||||
data={
|
||||
"record_id": str(sales_data.get("id")),
|
||||
"tenant_id": str(sales_data.get("tenant_id")),
|
||||
"product_name": sales_data.get("product_name"),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
},
|
||||
event_type="sales.updated",
|
||||
correlation_id=correlation_id
|
||||
)
|
||||
|
||||
client = await self._get_rabbitmq_client()
|
||||
success = await client.publish_event(
|
||||
exchange_name="sales.events",
|
||||
routing_key="sales.updated",
|
||||
event_data=event.to_dict()
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Sales record updated event published",
|
||||
record_id=sales_data.get("id"),
|
||||
tenant_id=sales_data.get("tenant_id"))
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish sales updated event", error=str(e))
|
||||
return False
|
||||
|
||||
async def publish_sales_deleted(self, record_id: UUID, tenant_id: UUID, correlation_id: Optional[str] = None) -> bool:
|
||||
"""Publish sales deleted event"""
|
||||
try:
|
||||
if not self.enabled:
|
||||
return True
|
||||
|
||||
event = BaseEvent(
|
||||
service_name="sales-service",
|
||||
data={
|
||||
"record_id": str(record_id),
|
||||
"tenant_id": str(tenant_id),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
},
|
||||
event_type="sales.deleted",
|
||||
correlation_id=correlation_id
|
||||
)
|
||||
|
||||
client = await self._get_rabbitmq_client()
|
||||
success = await client.publish_event(
|
||||
exchange_name="sales.events",
|
||||
routing_key="sales.deleted",
|
||||
event_data=event.to_dict()
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Sales record deleted event published",
|
||||
record_id=record_id,
|
||||
tenant_id=tenant_id)
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish sales deleted event", error=str(e))
|
||||
return False
|
||||
|
||||
async def publish_data_imported(self, import_result: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
|
||||
"""Publish data imported event"""
|
||||
try:
|
||||
if not self.enabled:
|
||||
return True
|
||||
|
||||
event = DataImportedEvent(
|
||||
service_name="sales-service",
|
||||
data={
|
||||
"records_created": import_result.get("records_created", 0),
|
||||
"records_updated": import_result.get("records_updated", 0),
|
||||
"records_failed": import_result.get("records_failed", 0),
|
||||
"tenant_id": str(import_result.get("tenant_id")),
|
||||
"success": import_result.get("success", False),
|
||||
"file_name": import_result.get("file_name"),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
},
|
||||
correlation_id=correlation_id
|
||||
)
|
||||
|
||||
client = await self._get_rabbitmq_client()
|
||||
success = await client.publish_event(
|
||||
exchange_name="data.events",
|
||||
routing_key="data.imported",
|
||||
event_data=event.to_dict()
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Sales data imported event published",
|
||||
records_created=import_result.get("records_created"),
|
||||
tenant_id=import_result.get("tenant_id"),
|
||||
success=import_result.get("success"))
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish data imported event", error=str(e))
|
||||
return False
|
||||
|
||||
async def publish_analytics_generated(self, analytics_data: Dict[str, Any], correlation_id: Optional[str] = None) -> bool:
|
||||
"""Publish analytics generated event"""
|
||||
try:
|
||||
if not self.enabled:
|
||||
return True
|
||||
|
||||
event = BaseEvent(
|
||||
service_name="sales-service",
|
||||
data={
|
||||
"tenant_id": str(analytics_data.get("tenant_id")),
|
||||
"total_revenue": float(analytics_data.get("total_revenue", 0)),
|
||||
"total_quantity": analytics_data.get("total_quantity", 0),
|
||||
"total_transactions": analytics_data.get("total_transactions", 0),
|
||||
"period_start": analytics_data.get("period_start"),
|
||||
"period_end": analytics_data.get("period_end"),
|
||||
"timestamp": datetime.now().isoformat()
|
||||
},
|
||||
event_type="analytics.generated",
|
||||
correlation_id=correlation_id
|
||||
)
|
||||
|
||||
client = await self._get_rabbitmq_client()
|
||||
success = await client.publish_event(
|
||||
exchange_name="analytics.events",
|
||||
routing_key="analytics.generated",
|
||||
event_data=event.to_dict()
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info("Sales analytics generated event published",
|
||||
tenant_id=analytics_data.get("tenant_id"),
|
||||
total_revenue=analytics_data.get("total_revenue"))
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.warning("Failed to publish analytics generated event", error=str(e))
|
||||
return False
|
||||
|
||||
async def cleanup(self):
|
||||
"""Cleanup RabbitMQ connections"""
|
||||
if self._rabbitmq_client:
|
||||
await self._rabbitmq_client.disconnect()
|
||||
|
||||
|
||||
# Global instance
|
||||
sales_publisher = SalesEventPublisher()
|
||||
171
services/sales/app/services/product_service.py
Normal file
171
services/sales/app/services/product_service.py
Normal file
@@ -0,0 +1,171 @@
|
||||
# services/sales/app/services/product_service.py
|
||||
"""
|
||||
Product Service - Business Logic Layer
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Dict, Any
|
||||
from uuid import UUID
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
|
||||
from app.models.sales import Product
|
||||
from app.repositories.product_repository import ProductRepository
|
||||
from app.schemas.sales import ProductCreate, ProductUpdate
|
||||
from app.core.database import get_db_transaction
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class ProductService:
|
||||
"""Service layer for product operations"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def create_product(
|
||||
self,
|
||||
product_data: ProductCreate,
|
||||
tenant_id: UUID,
|
||||
user_id: Optional[UUID] = None
|
||||
) -> Product:
|
||||
"""Create a new product with business validation"""
|
||||
try:
|
||||
# Business validation
|
||||
await self._validate_product_data(product_data, tenant_id)
|
||||
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
product = await repository.create_product(product_data, tenant_id)
|
||||
|
||||
logger.info("Created product", product_id=product.id, tenant_id=tenant_id)
|
||||
return product
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to create product", error=str(e), tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def update_product(
|
||||
self,
|
||||
product_id: UUID,
|
||||
update_data: ProductUpdate,
|
||||
tenant_id: UUID
|
||||
) -> Product:
|
||||
"""Update a product"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
|
||||
# Verify product belongs to tenant
|
||||
existing_product = await repository.get_by_id(product_id)
|
||||
if not existing_product or existing_product.tenant_id != tenant_id:
|
||||
raise ValueError(f"Product {product_id} not found for tenant {tenant_id}")
|
||||
|
||||
# Update the product
|
||||
updated_product = await repository.update(product_id, update_data.model_dump(exclude_unset=True))
|
||||
|
||||
logger.info("Updated product", product_id=product_id, tenant_id=tenant_id)
|
||||
return updated_product
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to update product", error=str(e), product_id=product_id, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def get_products(self, tenant_id: UUID) -> List[Product]:
|
||||
"""Get all products for a tenant"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
products = await repository.get_by_tenant(tenant_id)
|
||||
|
||||
logger.info("Retrieved products", count=len(products), tenant_id=tenant_id)
|
||||
return products
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get products", error=str(e), tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def get_product(self, product_id: UUID, tenant_id: UUID) -> Optional[Product]:
|
||||
"""Get a specific product"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
product = await repository.get_by_id(product_id)
|
||||
|
||||
# Verify product belongs to tenant
|
||||
if product and product.tenant_id != tenant_id:
|
||||
return None
|
||||
|
||||
return product
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get product", error=str(e), product_id=product_id, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def delete_product(self, product_id: UUID, tenant_id: UUID) -> bool:
|
||||
"""Delete a product"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
|
||||
# Verify product belongs to tenant
|
||||
existing_product = await repository.get_by_id(product_id)
|
||||
if not existing_product or existing_product.tenant_id != tenant_id:
|
||||
raise ValueError(f"Product {product_id} not found for tenant {tenant_id}")
|
||||
|
||||
success = await repository.delete(product_id)
|
||||
|
||||
if success:
|
||||
logger.info("Deleted product", product_id=product_id, tenant_id=tenant_id)
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to delete product", error=str(e), product_id=product_id, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def get_products_by_category(self, tenant_id: UUID, category: str) -> List[Product]:
|
||||
"""Get products by category"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
products = await repository.get_by_category(tenant_id, category)
|
||||
|
||||
logger.info("Retrieved products by category", count=len(products), category=category, tenant_id=tenant_id)
|
||||
return products
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get products by category", error=str(e), category=category, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def search_products(self, tenant_id: UUID, search_term: str) -> List[Product]:
|
||||
"""Search products by name or SKU"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
products = await repository.search_products(tenant_id, search_term)
|
||||
|
||||
logger.info("Searched products", count=len(products), search_term=search_term, tenant_id=tenant_id)
|
||||
return products
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to search products", error=str(e), search_term=search_term, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def _validate_product_data(self, product_data: ProductCreate, tenant_id: UUID):
|
||||
"""Validate product data according to business rules"""
|
||||
# Check if product with same SKU already exists
|
||||
if product_data.sku:
|
||||
async with get_db_transaction() as db:
|
||||
repository = ProductRepository(db)
|
||||
existing_product = await repository.get_by_sku(tenant_id, product_data.sku)
|
||||
if existing_product:
|
||||
raise ValueError(f"Product with SKU {product_data.sku} already exists for tenant {tenant_id}")
|
||||
|
||||
# Validate seasonal dates
|
||||
if product_data.is_seasonal:
|
||||
if not product_data.seasonal_start or not product_data.seasonal_end:
|
||||
raise ValueError("Seasonal products must have start and end dates")
|
||||
if product_data.seasonal_start >= product_data.seasonal_end:
|
||||
raise ValueError("Seasonal start date must be before end date")
|
||||
|
||||
logger.info("Product data validation passed", tenant_id=tenant_id)
|
||||
282
services/sales/app/services/sales_service.py
Normal file
282
services/sales/app/services/sales_service.py
Normal file
@@ -0,0 +1,282 @@
|
||||
# services/sales/app/services/sales_service.py
|
||||
"""
|
||||
Sales Service - Business Logic Layer
|
||||
"""
|
||||
|
||||
from typing import List, Optional, Dict, Any
|
||||
from uuid import UUID
|
||||
from datetime import datetime
|
||||
import structlog
|
||||
|
||||
from app.models.sales import SalesData
|
||||
from app.repositories.sales_repository import SalesRepository
|
||||
from app.schemas.sales import SalesDataCreate, SalesDataUpdate, SalesDataQuery, SalesAnalytics
|
||||
from app.core.database import get_db_transaction
|
||||
from shared.database.exceptions import DatabaseError
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
|
||||
class SalesService:
|
||||
"""Service layer for sales operations"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
async def create_sales_record(
|
||||
self,
|
||||
sales_data: SalesDataCreate,
|
||||
tenant_id: UUID,
|
||||
user_id: Optional[UUID] = None
|
||||
) -> SalesData:
|
||||
"""Create a new sales record with business validation"""
|
||||
try:
|
||||
# Business validation
|
||||
await self._validate_sales_data(sales_data, tenant_id)
|
||||
|
||||
# Set user who created the record
|
||||
if user_id:
|
||||
sales_data_dict = sales_data.model_dump()
|
||||
sales_data_dict['created_by'] = user_id
|
||||
sales_data = SalesDataCreate(**sales_data_dict)
|
||||
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
record = await repository.create_sales_record(sales_data, tenant_id)
|
||||
|
||||
# Additional business logic (e.g., notifications, analytics updates)
|
||||
await self._post_create_actions(record)
|
||||
|
||||
return record
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to create sales record in service", error=str(e), tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def update_sales_record(
|
||||
self,
|
||||
record_id: UUID,
|
||||
update_data: SalesDataUpdate,
|
||||
tenant_id: UUID
|
||||
) -> SalesData:
|
||||
"""Update a sales record"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
|
||||
# Verify record belongs to tenant
|
||||
existing_record = await repository.get_by_id(record_id)
|
||||
if not existing_record or existing_record.tenant_id != tenant_id:
|
||||
raise ValueError(f"Sales record {record_id} not found for tenant {tenant_id}")
|
||||
|
||||
# Update the record
|
||||
updated_record = await repository.update(record_id, update_data.model_dump(exclude_unset=True))
|
||||
|
||||
logger.info("Updated sales record", record_id=record_id, tenant_id=tenant_id)
|
||||
return updated_record
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to update sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def get_sales_records(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
query_params: Optional[SalesDataQuery] = None
|
||||
) -> List[SalesData]:
|
||||
"""Get sales records for a tenant"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
records = await repository.get_by_tenant(tenant_id, query_params)
|
||||
|
||||
logger.info("Retrieved sales records", count=len(records), tenant_id=tenant_id)
|
||||
return records
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get sales records", error=str(e), tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def get_sales_record(self, record_id: UUID, tenant_id: UUID) -> Optional[SalesData]:
|
||||
"""Get a specific sales record"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
record = await repository.get_by_id(record_id)
|
||||
|
||||
# Verify record belongs to tenant
|
||||
if record and record.tenant_id != tenant_id:
|
||||
return None
|
||||
|
||||
return record
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def delete_sales_record(self, record_id: UUID, tenant_id: UUID) -> bool:
|
||||
"""Delete a sales record"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
|
||||
# Verify record belongs to tenant
|
||||
existing_record = await repository.get_by_id(record_id)
|
||||
if not existing_record or existing_record.tenant_id != tenant_id:
|
||||
raise ValueError(f"Sales record {record_id} not found for tenant {tenant_id}")
|
||||
|
||||
success = await repository.delete(record_id)
|
||||
|
||||
if success:
|
||||
logger.info("Deleted sales record", record_id=record_id, tenant_id=tenant_id)
|
||||
|
||||
return success
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to delete sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def get_product_sales(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
product_name: str,
|
||||
start_date: Optional[datetime] = None,
|
||||
end_date: Optional[datetime] = None
|
||||
) -> List[SalesData]:
|
||||
"""Get sales records for a specific product"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
records = await repository.get_by_product(tenant_id, product_name, start_date, end_date)
|
||||
|
||||
logger.info(
|
||||
"Retrieved product sales",
|
||||
count=len(records),
|
||||
product=product_name,
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return records
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get product sales", error=str(e), tenant_id=tenant_id, product=product_name)
|
||||
raise
|
||||
|
||||
async def get_sales_analytics(
|
||||
self,
|
||||
tenant_id: UUID,
|
||||
start_date: Optional[datetime] = None,
|
||||
end_date: Optional[datetime] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Get sales analytics for a tenant"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
analytics = await repository.get_analytics(tenant_id, start_date, end_date)
|
||||
|
||||
logger.info("Retrieved sales analytics", tenant_id=tenant_id)
|
||||
return analytics
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get sales analytics", error=str(e), tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def get_product_categories(self, tenant_id: UUID) -> List[str]:
|
||||
"""Get distinct product categories"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
categories = await repository.get_product_categories(tenant_id)
|
||||
|
||||
return categories
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get product categories", error=str(e), tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def validate_sales_record(
|
||||
self,
|
||||
record_id: UUID,
|
||||
tenant_id: UUID,
|
||||
validation_notes: Optional[str] = None
|
||||
) -> SalesData:
|
||||
"""Validate a sales record"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
|
||||
# Verify record belongs to tenant
|
||||
existing_record = await repository.get_by_id(record_id)
|
||||
if not existing_record or existing_record.tenant_id != tenant_id:
|
||||
raise ValueError(f"Sales record {record_id} not found for tenant {tenant_id}")
|
||||
|
||||
validated_record = await repository.validate_record(record_id, validation_notes)
|
||||
|
||||
logger.info("Validated sales record", record_id=record_id, tenant_id=tenant_id)
|
||||
return validated_record
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to validate sales record", error=str(e), record_id=record_id, tenant_id=tenant_id)
|
||||
raise
|
||||
|
||||
async def _validate_sales_data(self, sales_data: SalesDataCreate, tenant_id: UUID):
|
||||
"""Validate sales data according to business rules"""
|
||||
# Example business validations
|
||||
|
||||
# Check if revenue matches quantity * unit_price (if unit_price provided)
|
||||
if sales_data.unit_price and sales_data.quantity_sold:
|
||||
expected_revenue = sales_data.unit_price * sales_data.quantity_sold
|
||||
# Apply discount if any
|
||||
if sales_data.discount_applied:
|
||||
expected_revenue *= (1 - sales_data.discount_applied / 100)
|
||||
|
||||
# Allow for small rounding differences
|
||||
if abs(float(sales_data.revenue) - float(expected_revenue)) > 0.01:
|
||||
logger.warning(
|
||||
"Revenue mismatch detected",
|
||||
expected=float(expected_revenue),
|
||||
actual=float(sales_data.revenue),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Check date validity (not in future)
|
||||
if sales_data.date > datetime.utcnow():
|
||||
raise ValueError("Sales date cannot be in the future")
|
||||
|
||||
# Additional business rules can be added here
|
||||
logger.info("Sales data validation passed", tenant_id=tenant_id)
|
||||
|
||||
async def _post_create_actions(self, record: SalesData):
|
||||
"""Actions to perform after creating a sales record"""
|
||||
try:
|
||||
# Here you could:
|
||||
# - Send notifications
|
||||
# - Update analytics caches
|
||||
# - Trigger ML model updates
|
||||
# - Update inventory levels (future integration)
|
||||
|
||||
logger.info("Post-create actions completed", record_id=record.id)
|
||||
|
||||
except Exception as e:
|
||||
# Don't fail the main operation for auxiliary actions
|
||||
logger.warning("Failed to execute post-create actions", error=str(e), record_id=record.id)
|
||||
|
||||
async def get_products_list(self, tenant_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get list of all products with sales data for tenant using repository pattern"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
|
||||
# Use repository method for product statistics
|
||||
products = await repository.get_product_statistics(tenant_id)
|
||||
|
||||
logger.debug("Products list retrieved successfully",
|
||||
tenant_id=tenant_id,
|
||||
product_count=len(products))
|
||||
|
||||
return products
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get products list",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id)
|
||||
raise DatabaseError(f"Failed to get products list: {str(e)}")
|
||||
Reference in New Issue
Block a user