REFACTOR external service and improve websocket training

This commit is contained in:
Urtzi Alfaro
2025-10-09 14:11:02 +02:00
parent 7c72f83c51
commit 3c689b4f98
111 changed files with 13289 additions and 2374 deletions

View File

@@ -16,13 +16,7 @@ import pandas as pd
from app.services.data_client import DataClient
from app.services.date_alignment_service import DateAlignmentService, DateRange, DataSourceType, AlignedDateRange
from app.services.messaging import (
publish_job_progress,
publish_data_validation_started,
publish_data_validation_completed,
publish_job_step_completed,
publish_job_failed
)
from app.services.training_events import publish_training_failed
logger = structlog.get_logger()
@@ -76,7 +70,6 @@ class TrainingDataOrchestrator:
# Step 1: Fetch and validate sales data (unified approach)
sales_data = await self.data_client.fetch_sales_data(tenant_id, fetch_all=True)
# Pre-flight validation moved here to eliminate duplicate fetching
if not sales_data or len(sales_data) == 0:
error_msg = f"No sales data available for tenant {tenant_id}. Please import sales data before starting training."
logger.error("Training aborted - no sales data", tenant_id=tenant_id, job_id=job_id)
@@ -172,7 +165,8 @@ class TrainingDataOrchestrator:
return training_dataset
except Exception as e:
publish_job_failed(job_id, tenant_id, str(e))
if job_id and tenant_id:
await publish_training_failed(job_id, tenant_id, str(e))
logger.error(f"Training data preparation failed: {str(e)}")
raise ValueError(f"Failed to prepare training data: {str(e)}")
@@ -472,30 +466,18 @@ class TrainingDataOrchestrator:
logger.warning(f"Enhanced traffic data collection failed: {e}")
return []
# Keep original method for backwards compatibility
async def _collect_traffic_data_with_timeout(
self,
lat: float,
lon: float,
aligned_range: AlignedDateRange,
tenant_id: str
) -> List[Dict[str, Any]]:
"""Legacy traffic data collection method - redirects to enhanced version"""
return await self._collect_traffic_data_with_timeout_enhanced(lat, lon, aligned_range, tenant_id)
def _log_enhanced_traffic_data_storage(self,
lat: float,
lon: float,
aligned_range: AlignedDateRange,
def _log_enhanced_traffic_data_storage(self,
lat: float,
lon: float,
aligned_range: AlignedDateRange,
record_count: int,
traffic_data: List[Dict[str, Any]]):
"""Enhanced logging for traffic data storage with detailed metadata"""
# Analyze the stored data for additional insights
cities_detected = set()
has_pedestrian_data = 0
data_sources = set()
districts_covered = set()
for record in traffic_data:
if 'city' in record and record['city']:
cities_detected.add(record['city'])
@@ -505,7 +487,7 @@ class TrainingDataOrchestrator:
data_sources.add(record['source'])
if 'district' in record and record['district']:
districts_covered.add(record['district'])
logger.info(
"Enhanced traffic data stored for re-training",
location=f"{lat:.4f},{lon:.4f}",
@@ -516,20 +498,9 @@ class TrainingDataOrchestrator:
data_sources=list(data_sources),
districts_covered=list(districts_covered),
storage_timestamp=datetime.now().isoformat(),
purpose="enhanced_model_training_and_retraining",
architecture_version="2.0_abstracted"
purpose="model_training_and_retraining"
)
def _log_traffic_data_storage(self,
lat: float,
lon: float,
aligned_range: AlignedDateRange,
record_count: int):
"""Legacy logging method - redirects to enhanced version"""
# Create minimal traffic data structure for enhanced logging
minimal_traffic_data = [{"city": "madrid", "source": "legacy"}] * min(record_count, 1)
self._log_enhanced_traffic_data_storage(lat, lon, aligned_range, record_count, minimal_traffic_data)
def _validate_weather_data(self, weather_data: List[Dict[str, Any]]) -> bool:
"""Validate weather data quality"""
if not weather_data: