Fix new services implementation 5
This commit is contained in:
@@ -236,11 +236,19 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
try:
|
||||
# Process by year and month to avoid memory issues
|
||||
current_date = start_date.replace(day=1) # Start from beginning of month
|
||||
now = datetime.now()
|
||||
|
||||
while current_date <= end_date:
|
||||
year = current_date.year
|
||||
month = current_date.month
|
||||
|
||||
# Skip current month and future months (no historical data available yet)
|
||||
if (year == now.year and month >= now.month) or year > now.year:
|
||||
self.logger.info("Skipping current/future month - no historical data available",
|
||||
year=year, month=month)
|
||||
current_date = self._next_month(current_date)
|
||||
continue
|
||||
|
||||
# Build historical URL
|
||||
zip_url = self.api_client._build_historical_url(year, month)
|
||||
|
||||
@@ -251,7 +259,7 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
zip_content = await self.api_client.fetch_historical_zip(zip_url)
|
||||
if not zip_content:
|
||||
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
|
||||
current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
|
||||
current_date = self._next_month(current_date)
|
||||
continue
|
||||
|
||||
# Process ZIP content with enhanced parsing
|
||||
@@ -286,11 +294,8 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
filtered_records=len(filtered_records),
|
||||
total_records=len(historical_records))
|
||||
|
||||
# Move to next month
|
||||
if current_date.month == 12:
|
||||
current_date = current_date.replace(year=current_date.year + 1, month=1)
|
||||
else:
|
||||
current_date = current_date.replace(month=current_date.month + 1)
|
||||
# Move to next month - extracted to helper method
|
||||
current_date = self._next_month(current_date)
|
||||
|
||||
return historical_records
|
||||
|
||||
@@ -347,4 +352,10 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
zip_url=zip_url, error=str(e))
|
||||
return []
|
||||
|
||||
def _next_month(self, current_date: datetime) -> datetime:
|
||||
"""Helper method to move to next month"""
|
||||
if current_date.month == 12:
|
||||
return current_date.replace(year=current_date.year + 1, month=1)
|
||||
else:
|
||||
return current_date.replace(month=current_date.month + 1)
|
||||
|
||||
|
||||
@@ -42,22 +42,9 @@ class MadridTrafficAPIClient(BaseAPIClient):
|
||||
|
||||
def _build_historical_url(self, year: int, month: int) -> str:
|
||||
"""Build historical ZIP URL for given year and month"""
|
||||
# Madrid historical data URL pattern
|
||||
base_url = "https://datos.madrid.es/egob/catalogo/208627"
|
||||
|
||||
# URL numbering pattern (this may need adjustment based on actual URLs)
|
||||
# Note: Historical data is only available for past periods, not current/future
|
||||
if year == 2023:
|
||||
url_number = 116 + (month - 1) # 116-127 for 2023
|
||||
elif year == 2024:
|
||||
url_number = 128 + (month - 1) # 128-139 for 2024
|
||||
elif year == 2025:
|
||||
# For 2025, use the continuing numbering from 2024
|
||||
url_number = 140 + (month - 1) # Starting from 140 for January 2025
|
||||
else:
|
||||
url_number = 116 # Fallback to 2023 data
|
||||
|
||||
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
|
||||
# Madrid uses a direct file pattern now: https://datos.madrid.es/egobfiles/MANUAL/208627/MM-YYYY.zip
|
||||
# Only historical data is available (not current month)
|
||||
return f"https://datos.madrid.es/egobfiles/MANUAL/208627/{month:02d}-{year}.zip"
|
||||
|
||||
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch current traffic XML data"""
|
||||
|
||||
@@ -84,18 +84,22 @@ class TrafficRepository:
|
||||
if not traffic_data_list:
|
||||
return 0
|
||||
|
||||
# Check for existing records to avoid duplicates
|
||||
# Check for existing records to avoid duplicates - batch the queries to avoid parameter limit
|
||||
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
|
||||
existing_dates = set()
|
||||
if dates:
|
||||
existing_stmt = select(TrafficData.date).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date.in_(dates)
|
||||
# PostgreSQL has a limit of 32767 parameters, so batch the queries
|
||||
batch_size = 30000 # Safe batch size under the limit
|
||||
for i in range(0, len(dates), batch_size):
|
||||
date_batch = dates[i:i + batch_size]
|
||||
existing_stmt = select(TrafficData.date).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date.in_(date_batch)
|
||||
)
|
||||
)
|
||||
)
|
||||
result = await self.session.execute(existing_stmt)
|
||||
existing_dates = {row[0] for row in result.fetchall()}
|
||||
result = await self.session.execute(existing_stmt)
|
||||
existing_dates.update({row[0] for row in result.fetchall()})
|
||||
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
|
||||
|
||||
batch_records = []
|
||||
|
||||
@@ -420,20 +420,20 @@ class EnhancedForecastingService:
|
||||
if prediction['prediction'] > 100: # Threshold for high demand
|
||||
alerts_to_create.append({
|
||||
"tenant_id": str(forecast.tenant_id),
|
||||
"forecast_id": forecast.id,
|
||||
"forecast_id": str(forecast.id), # Convert UUID to string
|
||||
"alert_type": "high_demand",
|
||||
"severity": "high" if prediction['prediction'] > 200 else "medium",
|
||||
"message": f"High demand predicted for inventory product {forecast.inventory_product_id}: {prediction['prediction']:.1f} units"
|
||||
"message": f"High demand predicted for inventory product {str(forecast.inventory_product_id)}: {prediction['prediction']:.1f} units"
|
||||
})
|
||||
|
||||
# Check for low demand alert
|
||||
elif prediction['prediction'] < 10: # Threshold for low demand
|
||||
alerts_to_create.append({
|
||||
"tenant_id": str(forecast.tenant_id),
|
||||
"forecast_id": forecast.id,
|
||||
"forecast_id": str(forecast.id), # Convert UUID to string
|
||||
"alert_type": "low_demand",
|
||||
"severity": "low",
|
||||
"message": f"Low demand predicted for inventory product {forecast.inventory_product_id}: {prediction['prediction']:.1f} units"
|
||||
"message": f"Low demand predicted for inventory product {str(forecast.inventory_product_id)}: {prediction['prediction']:.1f} units"
|
||||
})
|
||||
|
||||
# Check for stockout risk (very low prediction with narrow confidence interval)
|
||||
@@ -441,10 +441,10 @@ class EnhancedForecastingService:
|
||||
if prediction['prediction'] < 5 and confidence_interval < 10:
|
||||
alerts_to_create.append({
|
||||
"tenant_id": str(forecast.tenant_id),
|
||||
"forecast_id": forecast.id,
|
||||
"forecast_id": str(forecast.id), # Convert UUID to string
|
||||
"alert_type": "stockout_risk",
|
||||
"severity": "critical",
|
||||
"message": f"Stockout risk for inventory product {forecast.inventory_product_id}: predicted {prediction['prediction']:.1f} units with high confidence"
|
||||
"message": f"Stockout risk for inventory product {str(forecast.inventory_product_id)}: predicted {prediction['prediction']:.1f} units with high confidence"
|
||||
})
|
||||
|
||||
# Create alerts
|
||||
@@ -462,7 +462,7 @@ class EnhancedForecastingService:
|
||||
return ForecastResponse(
|
||||
id=str(cache_entry.id),
|
||||
tenant_id=str(cache_entry.tenant_id),
|
||||
inventory_product_id=cache_entry.inventory_product_id,
|
||||
inventory_product_id=str(cache_entry.inventory_product_id), # Convert UUID to string
|
||||
location=cache_entry.location,
|
||||
forecast_date=cache_entry.forecast_date,
|
||||
predicted_demand=cache_entry.predicted_demand,
|
||||
@@ -486,7 +486,7 @@ class EnhancedForecastingService:
|
||||
return ForecastResponse(
|
||||
id=str(forecast.id),
|
||||
tenant_id=str(forecast.tenant_id),
|
||||
inventory_product_id=forecast.inventory_product_id,
|
||||
inventory_product_id=str(forecast.inventory_product_id), # Convert UUID to string
|
||||
location=forecast.location,
|
||||
forecast_date=forecast.forecast_date,
|
||||
predicted_demand=forecast.predicted_demand,
|
||||
@@ -514,7 +514,7 @@ class EnhancedForecastingService:
|
||||
return {
|
||||
"id": str(forecast.id),
|
||||
"tenant_id": str(forecast.tenant_id),
|
||||
"inventory_product_id": forecast.inventory_product_id,
|
||||
"inventory_product_id": str(forecast.inventory_product_id), # Convert UUID to string
|
||||
"location": forecast.location,
|
||||
"forecast_date": forecast.forecast_date.isoformat(),
|
||||
"predicted_demand": forecast.predicted_demand,
|
||||
|
||||
@@ -90,6 +90,13 @@ class IngredientRepository(BaseRepository[Ingredient, IngredientCreate, Ingredie
|
||||
) -> List[Ingredient]:
|
||||
"""Get ingredients for a tenant with filtering"""
|
||||
try:
|
||||
# Handle search filter separately since it requires special query logic
|
||||
if filters and filters.get('search'):
|
||||
search_term = filters['search']
|
||||
logger.info(f"Searching ingredients with term: '{search_term}'", tenant_id=tenant_id)
|
||||
return await self.search_ingredients(tenant_id, search_term, skip, limit)
|
||||
|
||||
# Handle other filters with standard multi-get
|
||||
query_filters = {'tenant_id': tenant_id}
|
||||
if filters:
|
||||
if filters.get('category'):
|
||||
|
||||
@@ -26,28 +26,6 @@ def get_sales_service():
|
||||
"""Dependency injection for SalesService"""
|
||||
return SalesService()
|
||||
|
||||
@router.get("/tenants/{tenant_id}/sales/products")
|
||||
async def get_products_list(
|
||||
tenant_id: UUID = Path(..., description="Tenant ID"),
|
||||
current_user: Dict[str, Any] = Depends(get_current_user_dep),
|
||||
sales_service: SalesService = Depends(get_sales_service)
|
||||
):
|
||||
"""Get list of products using repository pattern"""
|
||||
try:
|
||||
logger.debug("Getting products list with repository pattern", tenant_id=tenant_id)
|
||||
|
||||
products = await sales_service.get_products_list(str(tenant_id))
|
||||
|
||||
logger.debug("Products list retrieved using repository",
|
||||
count=len(products),
|
||||
tenant_id=tenant_id)
|
||||
return products
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get products list",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id)
|
||||
raise HTTPException(status_code=500, detail=f"Failed to get products list: {str(e)}")
|
||||
|
||||
@router.post("/tenants/{tenant_id}/sales", response_model=SalesDataResponse)
|
||||
async def create_sales_record(
|
||||
|
||||
@@ -97,8 +97,9 @@ class SalesRepository(BaseRepository[SalesData, SalesDataCreate, SalesDataUpdate
|
||||
# Apply pagination
|
||||
stmt = stmt.offset(query_params.offset).limit(query_params.limit)
|
||||
else:
|
||||
# Default ordering
|
||||
stmt = stmt.order_by(desc(SalesData.date)).limit(50)
|
||||
# Default ordering with safety limit for direct repository calls
|
||||
# Note: API calls always provide query_params, so this only applies to direct usage
|
||||
stmt = stmt.order_by(desc(SalesData.date)).limit(10000)
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
records = result.scalars().all()
|
||||
@@ -279,24 +280,3 @@ class SalesRepository(BaseRepository[SalesData, SalesDataCreate, SalesDataUpdate
|
||||
logger.error("Failed to validate sales record", error=str(e), record_id=record_id)
|
||||
raise
|
||||
|
||||
async def get_product_statistics(self, tenant_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get product statistics for tenant"""
|
||||
try:
|
||||
# Note: product_name field was removed - product info now managed via inventory service
|
||||
# This method should be updated to query products from inventory service
|
||||
# For now, return inventory_product_ids to avoid breaking existing code
|
||||
stmt = select(SalesData.inventory_product_id).where(
|
||||
and_(
|
||||
SalesData.tenant_id == tenant_id,
|
||||
SalesData.inventory_product_id.is_not(None)
|
||||
)
|
||||
).distinct()
|
||||
|
||||
result = await self.session.execute(stmt)
|
||||
products = [str(row[0]) for row in result if row[0]]
|
||||
|
||||
return sorted(products)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get product categories", error=str(e), tenant_id=tenant_id)
|
||||
raise
|
||||
@@ -286,14 +286,67 @@ class AIOnboardingService:
|
||||
)
|
||||
suggestions.append(suggestion)
|
||||
|
||||
business_model = BusinessModelAnalysis(
|
||||
model=business_model_raw.get("model", "unknown"),
|
||||
confidence=business_model_raw.get("confidence", 0.0),
|
||||
ingredient_count=business_model_raw.get("ingredient_count", 0),
|
||||
finished_product_count=business_model_raw.get("finished_product_count", 0),
|
||||
ingredient_ratio=business_model_raw.get("ingredient_ratio", 0.0),
|
||||
recommendations=business_model_raw.get("recommendations", [])
|
||||
)
|
||||
# Check if enhanced business intelligence data is available
|
||||
bi_data = product_analysis.get('__business_intelligence__')
|
||||
|
||||
if bi_data and bi_data.get('confidence_score', 0) > 0.6:
|
||||
# Use enhanced business intelligence analysis
|
||||
business_type = bi_data.get('business_type', 'bakery')
|
||||
business_model_detected = bi_data.get('business_model', 'individual')
|
||||
|
||||
# Map business intelligence results to existing model format
|
||||
model_mapping = {
|
||||
'individual': 'individual_bakery',
|
||||
'central_distribution': 'central_baker_satellite',
|
||||
'central_bakery': 'central_baker_satellite',
|
||||
'hybrid': 'hybrid_bakery'
|
||||
}
|
||||
|
||||
mapped_model = model_mapping.get(business_model_detected, 'individual_bakery')
|
||||
|
||||
# Count ingredients vs finished products from suggestions
|
||||
ingredient_count = sum(1 for s in suggestions if s.product_type == 'ingredient')
|
||||
finished_product_count = sum(1 for s in suggestions if s.product_type == 'finished_product')
|
||||
total_products = len(suggestions)
|
||||
ingredient_ratio = ingredient_count / total_products if total_products > 0 else 0.0
|
||||
|
||||
# Enhanced recommendations based on BI analysis
|
||||
enhanced_recommendations = bi_data.get('recommendations', [])
|
||||
|
||||
# Add business type specific recommendations
|
||||
if business_type == 'coffee_shop':
|
||||
enhanced_recommendations.extend([
|
||||
"Configure beverage inventory management",
|
||||
"Set up quick-service item tracking",
|
||||
"Enable all-day service optimization"
|
||||
])
|
||||
|
||||
business_model = BusinessModelAnalysis(
|
||||
model=mapped_model,
|
||||
confidence=bi_data.get('confidence_score', 0.0),
|
||||
ingredient_count=ingredient_count,
|
||||
finished_product_count=finished_product_count,
|
||||
ingredient_ratio=ingredient_ratio,
|
||||
recommendations=enhanced_recommendations[:6] # Limit to top 6 recommendations
|
||||
)
|
||||
|
||||
logger.info("Using enhanced business intelligence for model analysis",
|
||||
detected_type=business_type,
|
||||
detected_model=business_model_detected,
|
||||
mapped_model=mapped_model,
|
||||
confidence=bi_data.get('confidence_score'))
|
||||
else:
|
||||
# Fallback to basic inventory service analysis
|
||||
business_model = BusinessModelAnalysis(
|
||||
model=business_model_raw.get("model", "unknown"),
|
||||
confidence=business_model_raw.get("confidence", 0.0),
|
||||
ingredient_count=business_model_raw.get("ingredient_count", 0),
|
||||
finished_product_count=business_model_raw.get("finished_product_count", 0),
|
||||
ingredient_ratio=business_model_raw.get("ingredient_ratio", 0.0),
|
||||
recommendations=business_model_raw.get("recommendations", [])
|
||||
)
|
||||
|
||||
logger.info("Using basic inventory service business model analysis")
|
||||
|
||||
# Calculate confidence metrics
|
||||
high_confidence_count = sum(1 for s in suggestions if s.confidence_score >= 0.7)
|
||||
@@ -674,6 +727,85 @@ class AIOnboardingService:
|
||||
"avg_unit_price": avg_unit_price
|
||||
}
|
||||
|
||||
# Add enhanced business intelligence analysis
|
||||
try:
|
||||
from app.services.business_intelligence_service import BusinessIntelligenceService
|
||||
|
||||
bi_service = BusinessIntelligenceService()
|
||||
|
||||
# Convert parsed data to format expected by BI service
|
||||
sales_data = []
|
||||
product_data = []
|
||||
|
||||
for row in rows:
|
||||
# Create sales record from CSV row
|
||||
sales_record = {
|
||||
'date': row.get(date_column, ''),
|
||||
'product_name': row.get(product_column, ''),
|
||||
'name': row.get(product_column, ''),
|
||||
'quantity_sold': 0,
|
||||
'revenue': 0,
|
||||
'location_id': row.get('location', 'main'),
|
||||
'sales_channel': row.get('channel', 'in_store'),
|
||||
'supplier_name': row.get('supplier', ''),
|
||||
'brand': row.get('brand', '')
|
||||
}
|
||||
|
||||
# Parse quantity
|
||||
if quantity_column:
|
||||
try:
|
||||
qty_raw = row.get(quantity_column, 1)
|
||||
if qty_raw and str(qty_raw).strip():
|
||||
sales_record['quantity_sold'] = int(float(str(qty_raw).replace(',', '.')))
|
||||
except:
|
||||
sales_record['quantity_sold'] = 1
|
||||
|
||||
# Parse revenue
|
||||
if revenue_column:
|
||||
try:
|
||||
rev_raw = row.get(revenue_column)
|
||||
if rev_raw and str(rev_raw).strip():
|
||||
sales_record['revenue'] = float(str(rev_raw).replace(',', '.').replace('€', '').replace('$', '').strip())
|
||||
except:
|
||||
pass
|
||||
|
||||
sales_data.append(sales_record)
|
||||
|
||||
# Create product data entry
|
||||
product_data.append({
|
||||
'name': sales_record['product_name'],
|
||||
'supplier_name': sales_record.get('supplier_name', ''),
|
||||
'brand': sales_record.get('brand', '')
|
||||
})
|
||||
|
||||
# Run business intelligence analysis
|
||||
if sales_data:
|
||||
detection_result = await bi_service.analyze_business_from_sales_data(
|
||||
sales_data=sales_data,
|
||||
product_data=product_data
|
||||
)
|
||||
|
||||
# Store business intelligence results in product_analysis
|
||||
product_analysis['__business_intelligence__'] = {
|
||||
"business_type": detection_result.business_type,
|
||||
"business_model": detection_result.business_model,
|
||||
"confidence_score": detection_result.confidence_score,
|
||||
"indicators": detection_result.indicators,
|
||||
"recommendations": detection_result.recommendations,
|
||||
"analysis_summary": f"{detection_result.business_type.title()} - {detection_result.business_model.replace('_', ' ').title()}"
|
||||
}
|
||||
|
||||
logger.info("Enhanced business intelligence analysis completed",
|
||||
business_type=detection_result.business_type,
|
||||
business_model=detection_result.business_model,
|
||||
confidence=detection_result.confidence_score)
|
||||
else:
|
||||
logger.warning("No sales data available for business intelligence analysis")
|
||||
|
||||
except Exception as bi_error:
|
||||
logger.warning("Business intelligence analysis failed", error=str(bi_error))
|
||||
# Continue with basic analysis even if BI fails
|
||||
|
||||
return product_analysis
|
||||
|
||||
except Exception as e:
|
||||
|
||||
@@ -428,7 +428,7 @@ class DataImportService:
|
||||
repository: SalesRepository,
|
||||
filename: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""Enhanced CSV processing with better data handling"""
|
||||
"""Enhanced CSV processing with batch product resolution for better reliability"""
|
||||
try:
|
||||
reader = csv.DictReader(io.StringIO(csv_content))
|
||||
rows = list(reader)
|
||||
@@ -445,22 +445,41 @@ class DataImportService:
|
||||
# Enhanced column mapping
|
||||
column_mapping = self._detect_columns(list(rows[0].keys()))
|
||||
|
||||
records_created = 0
|
||||
errors = []
|
||||
warnings = []
|
||||
# Pre-process to extract unique products for batch creation
|
||||
unique_products = set()
|
||||
parsed_rows = []
|
||||
|
||||
logger.info(f"Processing {len(rows)} records from CSV with enhanced mapping")
|
||||
logger.info(f"Pre-processing {len(rows)} records to identify unique products")
|
||||
|
||||
for index, row in enumerate(rows):
|
||||
try:
|
||||
# Enhanced data parsing and validation
|
||||
parsed_data = await self._parse_row_data(row, column_mapping, index + 1)
|
||||
if parsed_data.get("skip"):
|
||||
errors.extend(parsed_data.get("errors", []))
|
||||
warnings.extend(parsed_data.get("warnings", []))
|
||||
continue
|
||||
|
||||
# Resolve product name to inventory_product_id
|
||||
if not parsed_data.get("skip"):
|
||||
unique_products.add((
|
||||
parsed_data["product_name"],
|
||||
parsed_data.get("product_category", "general")
|
||||
))
|
||||
parsed_rows.append((index, parsed_data))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse row {index + 1}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Found {len(unique_products)} unique products, attempting batch resolution")
|
||||
|
||||
# Try to resolve/create all unique products in batch
|
||||
await self._batch_resolve_products(unique_products, tenant_id)
|
||||
|
||||
# Now process the actual sales records
|
||||
records_created = 0
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
logger.info(f"Processing {len(parsed_rows)} validated records for sales creation")
|
||||
|
||||
for index, parsed_data in parsed_rows:
|
||||
try:
|
||||
# Resolve product name to inventory_product_id (should be cached now)
|
||||
inventory_product_id = await self._resolve_product_to_inventory_id(
|
||||
parsed_data["product_name"],
|
||||
parsed_data.get("product_category"),
|
||||
@@ -914,47 +933,57 @@ class DataImportService:
|
||||
logger.info("Import cache cleared for new session")
|
||||
|
||||
async def _resolve_product_to_inventory_id(self, product_name: str, product_category: Optional[str], tenant_id: UUID) -> Optional[UUID]:
|
||||
"""Resolve a product name to an inventory_product_id via the inventory service with caching and rate limiting"""
|
||||
"""Resolve a product name to an inventory_product_id via the inventory service with improved error handling and fallback"""
|
||||
|
||||
# Check cache first
|
||||
if product_name in self.product_cache:
|
||||
logger.debug("Product resolved from cache", product_name=product_name, tenant_id=tenant_id)
|
||||
return self.product_cache[product_name]
|
||||
|
||||
# Skip if this product already failed to resolve
|
||||
# Skip if this product already failed to resolve after all attempts
|
||||
if product_name in self.failed_products:
|
||||
logger.debug("Skipping previously failed product", product_name=product_name, tenant_id=tenant_id)
|
||||
return None
|
||||
|
||||
max_retries = 3
|
||||
base_delay = 1.0 # Start with 1 second delay
|
||||
max_retries = 5 # Increased retries
|
||||
base_delay = 2.0 # Increased base delay
|
||||
fallback_retry_delay = 10.0 # Longer delay for fallback attempts
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
# Add delay before API calls to avoid rate limiting
|
||||
# Add progressive delay to avoid rate limiting
|
||||
if attempt > 0:
|
||||
delay = base_delay * (2 ** (attempt - 1)) # Exponential backoff
|
||||
# Use longer delays for later attempts
|
||||
if attempt >= 3:
|
||||
delay = fallback_retry_delay # Use fallback delay for later attempts
|
||||
else:
|
||||
delay = base_delay * (2 ** (attempt - 1)) # Exponential backoff
|
||||
|
||||
logger.info(f"Retrying product resolution after {delay}s delay",
|
||||
product_name=product_name, attempt=attempt, tenant_id=tenant_id)
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
# First try to search for existing product by name
|
||||
products = await self.inventory_client.search_products(product_name, tenant_id)
|
||||
try:
|
||||
products = await self.inventory_client.search_products(product_name, tenant_id)
|
||||
|
||||
if products:
|
||||
# Return the first matching product's ID
|
||||
product_id = products[0].get('id')
|
||||
if product_id:
|
||||
uuid_id = UUID(str(product_id))
|
||||
self.product_cache[product_name] = uuid_id # Cache for future use
|
||||
logger.info("Resolved product to existing inventory ID",
|
||||
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
|
||||
return uuid_id
|
||||
except Exception as search_error:
|
||||
logger.warning("Product search failed, trying direct creation",
|
||||
product_name=product_name, error=str(search_error), tenant_id=tenant_id)
|
||||
|
||||
if products:
|
||||
# Return the first matching product's ID
|
||||
product_id = products[0].get('id')
|
||||
if product_id:
|
||||
uuid_id = UUID(str(product_id))
|
||||
self.product_cache[product_name] = uuid_id # Cache for future use
|
||||
logger.info("Resolved product to existing inventory ID",
|
||||
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
|
||||
return uuid_id
|
||||
# Add delay before creation attempt to avoid hitting rate limits
|
||||
await asyncio.sleep(1.0)
|
||||
|
||||
# Add small delay before creation attempt to avoid hitting rate limits
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
# If not found, create a new ingredient/product in inventory
|
||||
# If not found or search failed, create a new ingredient/product in inventory
|
||||
ingredient_data = {
|
||||
'name': product_name,
|
||||
'type': 'finished_product', # Assuming sales are of finished products
|
||||
@@ -965,36 +994,133 @@ class DataImportService:
|
||||
'category': product_category or 'general'
|
||||
}
|
||||
|
||||
created_product = await self.inventory_client.create_ingredient(ingredient_data, str(tenant_id))
|
||||
if created_product and created_product.get('id'):
|
||||
product_id = created_product['id']
|
||||
uuid_id = UUID(str(product_id))
|
||||
self.product_cache[product_name] = uuid_id # Cache for future use
|
||||
logger.info("Created new inventory product for sales data",
|
||||
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
|
||||
return uuid_id
|
||||
try:
|
||||
created_product = await self.inventory_client.create_ingredient(ingredient_data, str(tenant_id))
|
||||
if created_product and created_product.get('id'):
|
||||
product_id = created_product['id']
|
||||
uuid_id = UUID(str(product_id))
|
||||
self.product_cache[product_name] = uuid_id # Cache for future use
|
||||
logger.info("Created new inventory product for sales data",
|
||||
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
|
||||
return uuid_id
|
||||
except Exception as creation_error:
|
||||
logger.warning("Product creation failed",
|
||||
product_name=product_name, error=str(creation_error), tenant_id=tenant_id)
|
||||
|
||||
logger.warning("Failed to resolve or create product in inventory",
|
||||
product_name=product_name, tenant_id=tenant_id, attempt=attempt)
|
||||
|
||||
except Exception as e:
|
||||
error_str = str(e)
|
||||
if "429" in error_str or "rate limit" in error_str.lower():
|
||||
logger.warning("Rate limit hit, retrying",
|
||||
if "429" in error_str or "rate limit" in error_str.lower() or "too many requests" in error_str.lower():
|
||||
logger.warning("Rate limit or service overload detected, retrying with longer delay",
|
||||
product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
|
||||
if attempt < max_retries - 1:
|
||||
continue # Retry with exponential backoff
|
||||
elif "503" in error_str or "502" in error_str or "service unavailable" in error_str.lower():
|
||||
logger.warning("Service unavailable, retrying with backoff",
|
||||
product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
|
||||
if attempt < max_retries - 1:
|
||||
continue # Retry for service unavailable errors
|
||||
elif "timeout" in error_str.lower() or "connection" in error_str.lower():
|
||||
logger.warning("Network issue detected, retrying",
|
||||
product_name=product_name, attempt=attempt, error=error_str, tenant_id=tenant_id)
|
||||
if attempt < max_retries - 1:
|
||||
continue # Retry for network issues
|
||||
else:
|
||||
logger.error("Error resolving product to inventory ID",
|
||||
logger.error("Non-retryable error resolving product to inventory ID",
|
||||
error=error_str, product_name=product_name, tenant_id=tenant_id)
|
||||
break # Don't retry for non-rate-limit errors
|
||||
if attempt < max_retries - 1:
|
||||
# Still retry even for other errors, in case it's transient
|
||||
continue
|
||||
else:
|
||||
break # Don't retry on final attempt
|
||||
|
||||
# If all retries failed, mark as failed and return None
|
||||
# If all retries failed, log detailed error but don't mark as permanently failed yet
|
||||
# Instead, we'll implement a fallback mechanism
|
||||
logger.error("Failed to resolve product after all retries, attempting fallback",
|
||||
product_name=product_name, tenant_id=tenant_id)
|
||||
|
||||
# FALLBACK: Try to create a temporary product with minimal data
|
||||
try:
|
||||
# Use a simplified approach with minimal data
|
||||
fallback_data = {
|
||||
'name': product_name,
|
||||
'type': 'finished_product',
|
||||
'unit': 'unit',
|
||||
'current_stock': 0,
|
||||
'cost_per_unit': 0
|
||||
}
|
||||
|
||||
logger.info("Attempting fallback product creation with minimal data",
|
||||
product_name=product_name, tenant_id=tenant_id)
|
||||
|
||||
created_product = await self.inventory_client.create_ingredient(fallback_data, str(tenant_id))
|
||||
if created_product and created_product.get('id'):
|
||||
product_id = created_product['id']
|
||||
uuid_id = UUID(str(product_id))
|
||||
self.product_cache[product_name] = uuid_id
|
||||
logger.info("SUCCESS: Fallback product creation succeeded",
|
||||
product_name=product_name, product_id=product_id, tenant_id=tenant_id)
|
||||
return uuid_id
|
||||
except Exception as fallback_error:
|
||||
logger.error("Fallback product creation also failed",
|
||||
product_name=product_name, error=str(fallback_error), tenant_id=tenant_id)
|
||||
|
||||
# Only mark as permanently failed after all attempts including fallback
|
||||
self.failed_products.add(product_name)
|
||||
logger.error("Failed to resolve product after all retries",
|
||||
logger.error("CRITICAL: Permanently failed to resolve product - this will result in missing training data",
|
||||
product_name=product_name, tenant_id=tenant_id)
|
||||
return None
|
||||
|
||||
async def _batch_resolve_products(self, unique_products: set, tenant_id: str) -> None:
|
||||
"""Batch resolve/create products to reduce API calls and improve success rate"""
|
||||
|
||||
if not unique_products:
|
||||
return
|
||||
|
||||
logger.info(f"Starting batch product resolution for {len(unique_products)} unique products")
|
||||
|
||||
# Convert set to list for easier handling
|
||||
products_list = list(unique_products)
|
||||
batch_size = 5 # Process in smaller batches to avoid overwhelming the inventory service
|
||||
|
||||
for i in range(0, len(products_list), batch_size):
|
||||
batch = products_list[i:i + batch_size]
|
||||
logger.info(f"Processing batch {i//batch_size + 1}/{(len(products_list) + batch_size - 1)//batch_size}")
|
||||
|
||||
# Process each product in the batch with retry logic
|
||||
for product_name, product_category in batch:
|
||||
try:
|
||||
# Skip if already in cache or failed list
|
||||
if product_name in self.product_cache or product_name in self.failed_products:
|
||||
continue
|
||||
|
||||
# Try to resolve the product
|
||||
await self._resolve_product_to_inventory_id(product_name, product_category, tenant_id)
|
||||
|
||||
# Add small delay between products to be gentle on the API
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to batch process product {product_name}: {e}")
|
||||
continue
|
||||
|
||||
# Add delay between batches
|
||||
if i + batch_size < len(products_list):
|
||||
logger.info("Waiting between batches to avoid rate limiting...")
|
||||
await asyncio.sleep(2.0)
|
||||
|
||||
successful_resolutions = len([p for p, _ in products_list if p in self.product_cache])
|
||||
failed_resolutions = len([p for p, _ in products_list if p in self.failed_products])
|
||||
|
||||
logger.info(f"Batch product resolution completed: {successful_resolutions} successful, {failed_resolutions} failed")
|
||||
|
||||
if failed_resolutions > 0:
|
||||
logger.warning(f"ATTENTION: {failed_resolutions} products failed to resolve - these will be missing from training data")
|
||||
|
||||
return
|
||||
|
||||
def _structure_messages(self, messages: List[Union[str, Dict]]) -> List[Dict[str, Any]]:
|
||||
"""Convert string messages to structured format"""
|
||||
structured = []
|
||||
|
||||
@@ -285,26 +285,6 @@ class SalesService:
|
||||
# Don't fail the main operation for auxiliary actions
|
||||
logger.warning("Failed to execute post-create actions", error=str(e), record_id=record.id)
|
||||
|
||||
async def get_products_list(self, tenant_id: str) -> List[Dict[str, Any]]:
|
||||
"""Get list of all products with sales data for tenant using repository pattern"""
|
||||
try:
|
||||
async with get_db_transaction() as db:
|
||||
repository = SalesRepository(db)
|
||||
|
||||
# Use repository method for product statistics
|
||||
products = await repository.get_product_statistics(tenant_id)
|
||||
|
||||
logger.debug("Products list retrieved successfully",
|
||||
tenant_id=tenant_id,
|
||||
product_count=len(products))
|
||||
|
||||
return products
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to get products list",
|
||||
error=str(e),
|
||||
tenant_id=tenant_id)
|
||||
raise DatabaseError(f"Failed to get products list: {str(e)}")
|
||||
|
||||
# New inventory integration methods
|
||||
async def search_inventory_products(self, search_term: str, tenant_id: UUID,
|
||||
|
||||
@@ -186,6 +186,15 @@ async def execute_enhanced_training_job_background(
|
||||
enhanced_training_service = EnhancedTrainingService(database_manager)
|
||||
|
||||
try:
|
||||
# Create initial training log entry first
|
||||
await enhanced_training_service._update_job_status_repository(
|
||||
job_id=job_id,
|
||||
status="pending",
|
||||
progress=0,
|
||||
current_step="Starting enhanced training job",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Publish job started event
|
||||
await publish_job_started(job_id, tenant_id, {
|
||||
"enhanced_features": True,
|
||||
@@ -214,7 +223,8 @@ async def execute_enhanced_training_job_background(
|
||||
job_id=job_id,
|
||||
status="running",
|
||||
progress=0,
|
||||
current_step="Initializing enhanced training pipeline"
|
||||
current_step="Initializing enhanced training pipeline",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Execute the enhanced training pipeline with repository pattern
|
||||
@@ -232,7 +242,8 @@ async def execute_enhanced_training_job_background(
|
||||
status="completed",
|
||||
progress=100,
|
||||
current_step="Enhanced training completed successfully",
|
||||
results=result
|
||||
results=result,
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Publish enhanced completion event
|
||||
@@ -262,7 +273,8 @@ async def execute_enhanced_training_job_background(
|
||||
status="failed",
|
||||
progress=0,
|
||||
current_step="Enhanced training failed",
|
||||
error_message=str(training_error)
|
||||
error_message=str(training_error),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
except Exception as status_error:
|
||||
logger.error("Failed to update job status after training error",
|
||||
|
||||
@@ -92,9 +92,27 @@ class EnhancedBakeryMLTrainer:
|
||||
|
||||
# Get unique products from the sales data
|
||||
products = sales_df['inventory_product_id'].unique().tolist()
|
||||
logger.info("Training enhanced models",
|
||||
|
||||
# Debug: Log sales data details to understand why only one product is found
|
||||
total_sales_records = len(sales_df)
|
||||
sales_by_product = sales_df.groupby('inventory_product_id').size().to_dict()
|
||||
|
||||
logger.info("Enhanced training pipeline - Sales data analysis",
|
||||
total_sales_records=total_sales_records,
|
||||
products_count=len(products),
|
||||
products=products)
|
||||
products=products,
|
||||
sales_by_product=sales_by_product)
|
||||
|
||||
if len(products) == 1:
|
||||
logger.warning("Only ONE product found in sales data - this may indicate a data fetching issue",
|
||||
tenant_id=tenant_id,
|
||||
single_product_id=products[0],
|
||||
total_sales_records=total_sales_records)
|
||||
elif len(products) == 0:
|
||||
raise ValueError("No products found in sales data")
|
||||
else:
|
||||
logger.info("Multiple products detected for training",
|
||||
products_count=len(products))
|
||||
|
||||
self.status_publisher.products_total = len(products)
|
||||
|
||||
@@ -512,7 +530,7 @@ class EnhancedBakeryMLTrainer:
|
||||
from_column='quantity_sold',
|
||||
to_column='quantity')
|
||||
|
||||
required_columns = ['date', 'product_name', 'quantity']
|
||||
required_columns = ['date', 'inventory_product_id', 'quantity']
|
||||
missing_columns = [col for col in required_columns if col not in sales_df.columns]
|
||||
if missing_columns:
|
||||
raise ValueError(f"Missing required columns: {missing_columns}")
|
||||
@@ -541,7 +559,7 @@ class EnhancedBakeryMLTrainer:
|
||||
try:
|
||||
logger.info("Enhanced model evaluation starting",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name)
|
||||
inventory_product_id=inventory_product_id)
|
||||
|
||||
# Get database session and repositories
|
||||
async with self.database_manager.get_session() as db_session:
|
||||
|
||||
@@ -574,13 +574,14 @@ class TrainingDataOrchestrator:
|
||||
if city_count >= 1: # At least some city awareness
|
||||
city_aware_records += 1
|
||||
|
||||
# Record is valid if it has basic requirements
|
||||
if record_score >= 2:
|
||||
# Record is valid if it has basic requirements (date + any traffic field)
|
||||
# Lowered requirement from >= 2 to >= 1 to accept records with just date or traffic data
|
||||
if record_score >= 1:
|
||||
valid_records += 1
|
||||
|
||||
total_records = len(traffic_data)
|
||||
validity_threshold = 0.3
|
||||
enhancement_threshold = 0.2 # Lower threshold for enhanced features
|
||||
validity_threshold = 0.1 # Reduced from 0.3 to 0.1 - accept if 10% of records are valid
|
||||
enhancement_threshold = 0.1 # Reduced threshold for enhanced features
|
||||
|
||||
basic_validity = (valid_records / total_records) >= validity_threshold
|
||||
has_enhancements = (enhanced_records / total_records) >= enhancement_threshold
|
||||
|
||||
@@ -141,6 +141,30 @@ class EnhancedTrainingService:
|
||||
logger.error("Training aborted - no sales data", tenant_id=tenant_id, job_id=job_id)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# Debug: Analyze the sales data structure to understand product distribution
|
||||
sales_df_debug = pd.DataFrame(sales_data)
|
||||
if 'inventory_product_id' in sales_df_debug.columns:
|
||||
unique_products_found = sales_df_debug['inventory_product_id'].unique()
|
||||
product_counts = sales_df_debug['inventory_product_id'].value_counts().to_dict()
|
||||
|
||||
logger.info("Pre-flight sales data analysis",
|
||||
tenant_id=tenant_id,
|
||||
job_id=job_id,
|
||||
total_sales_records=len(sales_data),
|
||||
unique_products_count=len(unique_products_found),
|
||||
unique_products=unique_products_found.tolist(),
|
||||
records_per_product=product_counts)
|
||||
|
||||
if len(unique_products_found) == 1:
|
||||
logger.warning("POTENTIAL ISSUE: Only ONE unique product found in all sales data",
|
||||
tenant_id=tenant_id,
|
||||
single_product=unique_products_found[0],
|
||||
record_count=len(sales_data))
|
||||
else:
|
||||
logger.warning("No 'inventory_product_id' column found in sales data",
|
||||
tenant_id=tenant_id,
|
||||
columns=list(sales_df_debug.columns))
|
||||
|
||||
logger.info(f"Pre-flight check passed: {len(sales_data)} sales records found",
|
||||
tenant_id=tenant_id, job_id=job_id)
|
||||
|
||||
@@ -536,18 +560,69 @@ class EnhancedTrainingService:
|
||||
progress: int = None,
|
||||
current_step: str = None,
|
||||
error_message: str = None,
|
||||
results: Dict = None):
|
||||
results: Dict = None,
|
||||
tenant_id: str = None):
|
||||
"""Update job status using repository pattern"""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
await self._init_repositories(session)
|
||||
|
||||
await self.training_log_repo.update_log_progress(
|
||||
job_id=job_id,
|
||||
progress=progress,
|
||||
current_step=current_step,
|
||||
status=status
|
||||
)
|
||||
# Check if log exists, create if not
|
||||
existing_log = await self.training_log_repo.get_log_by_job_id(job_id)
|
||||
|
||||
if not existing_log:
|
||||
# Create initial log entry
|
||||
if not tenant_id:
|
||||
# Extract tenant_id from job_id if not provided
|
||||
# Format: enhanced_training_{tenant_id}_{job_suffix}
|
||||
try:
|
||||
parts = job_id.split('_')
|
||||
if len(parts) >= 3 and parts[0] == 'enhanced' and parts[1] == 'training':
|
||||
tenant_id = parts[2]
|
||||
except Exception:
|
||||
logger.warning(f"Could not extract tenant_id from job_id {job_id}")
|
||||
|
||||
if tenant_id:
|
||||
log_data = {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"status": status or "pending",
|
||||
"progress": progress or 0,
|
||||
"current_step": current_step or "initializing",
|
||||
"start_time": datetime.utcnow()
|
||||
}
|
||||
|
||||
if error_message:
|
||||
log_data["error_message"] = error_message
|
||||
if results:
|
||||
log_data["results"] = results
|
||||
|
||||
await self.training_log_repo.create_training_log(log_data)
|
||||
logger.info("Created initial training log", job_id=job_id, tenant_id=tenant_id)
|
||||
else:
|
||||
logger.error("Cannot create training log without tenant_id", job_id=job_id)
|
||||
return
|
||||
else:
|
||||
# Update existing log
|
||||
await self.training_log_repo.update_log_progress(
|
||||
job_id=job_id,
|
||||
progress=progress,
|
||||
current_step=current_step,
|
||||
status=status
|
||||
)
|
||||
|
||||
# Update additional fields if provided
|
||||
if error_message or results:
|
||||
update_data = {}
|
||||
if error_message:
|
||||
update_data["error_message"] = error_message
|
||||
if results:
|
||||
update_data["results"] = results
|
||||
if status in ["completed", "failed"]:
|
||||
update_data["end_time"] = datetime.utcnow()
|
||||
|
||||
if update_data:
|
||||
await self.training_log_repo.update(existing_log.id, update_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to update job status using repository",
|
||||
|
||||
Reference in New Issue
Block a user