Fix new services implementation 5
This commit is contained in:
@@ -186,6 +186,15 @@ async def execute_enhanced_training_job_background(
|
||||
enhanced_training_service = EnhancedTrainingService(database_manager)
|
||||
|
||||
try:
|
||||
# Create initial training log entry first
|
||||
await enhanced_training_service._update_job_status_repository(
|
||||
job_id=job_id,
|
||||
status="pending",
|
||||
progress=0,
|
||||
current_step="Starting enhanced training job",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Publish job started event
|
||||
await publish_job_started(job_id, tenant_id, {
|
||||
"enhanced_features": True,
|
||||
@@ -214,7 +223,8 @@ async def execute_enhanced_training_job_background(
|
||||
job_id=job_id,
|
||||
status="running",
|
||||
progress=0,
|
||||
current_step="Initializing enhanced training pipeline"
|
||||
current_step="Initializing enhanced training pipeline",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Execute the enhanced training pipeline with repository pattern
|
||||
@@ -232,7 +242,8 @@ async def execute_enhanced_training_job_background(
|
||||
status="completed",
|
||||
progress=100,
|
||||
current_step="Enhanced training completed successfully",
|
||||
results=result
|
||||
results=result,
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
# Publish enhanced completion event
|
||||
@@ -262,7 +273,8 @@ async def execute_enhanced_training_job_background(
|
||||
status="failed",
|
||||
progress=0,
|
||||
current_step="Enhanced training failed",
|
||||
error_message=str(training_error)
|
||||
error_message=str(training_error),
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
except Exception as status_error:
|
||||
logger.error("Failed to update job status after training error",
|
||||
|
||||
@@ -92,9 +92,27 @@ class EnhancedBakeryMLTrainer:
|
||||
|
||||
# Get unique products from the sales data
|
||||
products = sales_df['inventory_product_id'].unique().tolist()
|
||||
logger.info("Training enhanced models",
|
||||
|
||||
# Debug: Log sales data details to understand why only one product is found
|
||||
total_sales_records = len(sales_df)
|
||||
sales_by_product = sales_df.groupby('inventory_product_id').size().to_dict()
|
||||
|
||||
logger.info("Enhanced training pipeline - Sales data analysis",
|
||||
total_sales_records=total_sales_records,
|
||||
products_count=len(products),
|
||||
products=products)
|
||||
products=products,
|
||||
sales_by_product=sales_by_product)
|
||||
|
||||
if len(products) == 1:
|
||||
logger.warning("Only ONE product found in sales data - this may indicate a data fetching issue",
|
||||
tenant_id=tenant_id,
|
||||
single_product_id=products[0],
|
||||
total_sales_records=total_sales_records)
|
||||
elif len(products) == 0:
|
||||
raise ValueError("No products found in sales data")
|
||||
else:
|
||||
logger.info("Multiple products detected for training",
|
||||
products_count=len(products))
|
||||
|
||||
self.status_publisher.products_total = len(products)
|
||||
|
||||
@@ -512,7 +530,7 @@ class EnhancedBakeryMLTrainer:
|
||||
from_column='quantity_sold',
|
||||
to_column='quantity')
|
||||
|
||||
required_columns = ['date', 'product_name', 'quantity']
|
||||
required_columns = ['date', 'inventory_product_id', 'quantity']
|
||||
missing_columns = [col for col in required_columns if col not in sales_df.columns]
|
||||
if missing_columns:
|
||||
raise ValueError(f"Missing required columns: {missing_columns}")
|
||||
@@ -541,7 +559,7 @@ class EnhancedBakeryMLTrainer:
|
||||
try:
|
||||
logger.info("Enhanced model evaluation starting",
|
||||
tenant_id=tenant_id,
|
||||
product_name=product_name)
|
||||
inventory_product_id=inventory_product_id)
|
||||
|
||||
# Get database session and repositories
|
||||
async with self.database_manager.get_session() as db_session:
|
||||
|
||||
@@ -574,13 +574,14 @@ class TrainingDataOrchestrator:
|
||||
if city_count >= 1: # At least some city awareness
|
||||
city_aware_records += 1
|
||||
|
||||
# Record is valid if it has basic requirements
|
||||
if record_score >= 2:
|
||||
# Record is valid if it has basic requirements (date + any traffic field)
|
||||
# Lowered requirement from >= 2 to >= 1 to accept records with just date or traffic data
|
||||
if record_score >= 1:
|
||||
valid_records += 1
|
||||
|
||||
total_records = len(traffic_data)
|
||||
validity_threshold = 0.3
|
||||
enhancement_threshold = 0.2 # Lower threshold for enhanced features
|
||||
validity_threshold = 0.1 # Reduced from 0.3 to 0.1 - accept if 10% of records are valid
|
||||
enhancement_threshold = 0.1 # Reduced threshold for enhanced features
|
||||
|
||||
basic_validity = (valid_records / total_records) >= validity_threshold
|
||||
has_enhancements = (enhanced_records / total_records) >= enhancement_threshold
|
||||
|
||||
@@ -141,6 +141,30 @@ class EnhancedTrainingService:
|
||||
logger.error("Training aborted - no sales data", tenant_id=tenant_id, job_id=job_id)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# Debug: Analyze the sales data structure to understand product distribution
|
||||
sales_df_debug = pd.DataFrame(sales_data)
|
||||
if 'inventory_product_id' in sales_df_debug.columns:
|
||||
unique_products_found = sales_df_debug['inventory_product_id'].unique()
|
||||
product_counts = sales_df_debug['inventory_product_id'].value_counts().to_dict()
|
||||
|
||||
logger.info("Pre-flight sales data analysis",
|
||||
tenant_id=tenant_id,
|
||||
job_id=job_id,
|
||||
total_sales_records=len(sales_data),
|
||||
unique_products_count=len(unique_products_found),
|
||||
unique_products=unique_products_found.tolist(),
|
||||
records_per_product=product_counts)
|
||||
|
||||
if len(unique_products_found) == 1:
|
||||
logger.warning("POTENTIAL ISSUE: Only ONE unique product found in all sales data",
|
||||
tenant_id=tenant_id,
|
||||
single_product=unique_products_found[0],
|
||||
record_count=len(sales_data))
|
||||
else:
|
||||
logger.warning("No 'inventory_product_id' column found in sales data",
|
||||
tenant_id=tenant_id,
|
||||
columns=list(sales_df_debug.columns))
|
||||
|
||||
logger.info(f"Pre-flight check passed: {len(sales_data)} sales records found",
|
||||
tenant_id=tenant_id, job_id=job_id)
|
||||
|
||||
@@ -536,18 +560,69 @@ class EnhancedTrainingService:
|
||||
progress: int = None,
|
||||
current_step: str = None,
|
||||
error_message: str = None,
|
||||
results: Dict = None):
|
||||
results: Dict = None,
|
||||
tenant_id: str = None):
|
||||
"""Update job status using repository pattern"""
|
||||
try:
|
||||
async with self.database_manager.get_session() as session:
|
||||
await self._init_repositories(session)
|
||||
|
||||
await self.training_log_repo.update_log_progress(
|
||||
job_id=job_id,
|
||||
progress=progress,
|
||||
current_step=current_step,
|
||||
status=status
|
||||
)
|
||||
# Check if log exists, create if not
|
||||
existing_log = await self.training_log_repo.get_log_by_job_id(job_id)
|
||||
|
||||
if not existing_log:
|
||||
# Create initial log entry
|
||||
if not tenant_id:
|
||||
# Extract tenant_id from job_id if not provided
|
||||
# Format: enhanced_training_{tenant_id}_{job_suffix}
|
||||
try:
|
||||
parts = job_id.split('_')
|
||||
if len(parts) >= 3 and parts[0] == 'enhanced' and parts[1] == 'training':
|
||||
tenant_id = parts[2]
|
||||
except Exception:
|
||||
logger.warning(f"Could not extract tenant_id from job_id {job_id}")
|
||||
|
||||
if tenant_id:
|
||||
log_data = {
|
||||
"job_id": job_id,
|
||||
"tenant_id": tenant_id,
|
||||
"status": status or "pending",
|
||||
"progress": progress or 0,
|
||||
"current_step": current_step or "initializing",
|
||||
"start_time": datetime.utcnow()
|
||||
}
|
||||
|
||||
if error_message:
|
||||
log_data["error_message"] = error_message
|
||||
if results:
|
||||
log_data["results"] = results
|
||||
|
||||
await self.training_log_repo.create_training_log(log_data)
|
||||
logger.info("Created initial training log", job_id=job_id, tenant_id=tenant_id)
|
||||
else:
|
||||
logger.error("Cannot create training log without tenant_id", job_id=job_id)
|
||||
return
|
||||
else:
|
||||
# Update existing log
|
||||
await self.training_log_repo.update_log_progress(
|
||||
job_id=job_id,
|
||||
progress=progress,
|
||||
current_step=current_step,
|
||||
status=status
|
||||
)
|
||||
|
||||
# Update additional fields if provided
|
||||
if error_message or results:
|
||||
update_data = {}
|
||||
if error_message:
|
||||
update_data["error_message"] = error_message
|
||||
if results:
|
||||
update_data["results"] = results
|
||||
if status in ["completed", "failed"]:
|
||||
update_data["end_time"] = datetime.utcnow()
|
||||
|
||||
if update_data:
|
||||
await self.training_log_repo.update(existing_log.id, update_data)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to update job status using repository",
|
||||
|
||||
Reference in New Issue
Block a user