Improve the traffic fetching system
This commit is contained in:
@@ -24,6 +24,13 @@ class DataClient:
|
||||
# Get the shared data client configured for this service
|
||||
self.data_client = get_data_client(settings, "training")
|
||||
|
||||
# Check if the new method is available for stored traffic data
|
||||
if hasattr(self.data_client, 'get_stored_traffic_data_for_training'):
|
||||
self.supports_stored_traffic_data = True
|
||||
else:
|
||||
self.supports_stored_traffic_data = False
|
||||
logger.warning("Stored traffic data method not available in data client")
|
||||
|
||||
# Or alternatively, get all clients at once:
|
||||
# self.clients = get_service_clients(settings, "training")
|
||||
# Then use: self.clients.data.get_sales_data(...)
|
||||
@@ -147,6 +154,51 @@ class DataClient:
|
||||
logger.error(f"Error fetching traffic data: {e}", tenant_id=tenant_id)
|
||||
return []
|
||||
|
||||
async def fetch_stored_traffic_data_for_training(
|
||||
self,
|
||||
tenant_id: str,
|
||||
start_date: str,
|
||||
end_date: str,
|
||||
latitude: Optional[float] = None,
|
||||
longitude: Optional[float] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fetch stored traffic data specifically for training/re-training
|
||||
This method accesses previously stored traffic data without making new API calls
|
||||
"""
|
||||
try:
|
||||
if self.supports_stored_traffic_data:
|
||||
# Use the dedicated stored traffic data method
|
||||
stored_traffic_data = await self.data_client.get_stored_traffic_data_for_training(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
latitude=latitude,
|
||||
longitude=longitude
|
||||
)
|
||||
|
||||
if stored_traffic_data:
|
||||
logger.info(f"Retrieved {len(stored_traffic_data)} stored traffic records for training",
|
||||
tenant_id=tenant_id)
|
||||
return stored_traffic_data
|
||||
else:
|
||||
logger.warning("No stored traffic data available for training", tenant_id=tenant_id)
|
||||
return []
|
||||
else:
|
||||
# Fallback to regular traffic data method
|
||||
logger.info("Using fallback traffic data method for training")
|
||||
return await self.fetch_traffic_data(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date,
|
||||
latitude=latitude,
|
||||
longitude=longitude
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching stored traffic data for training: {e}", tenant_id=tenant_id)
|
||||
return []
|
||||
|
||||
async def validate_data_quality(
|
||||
self,
|
||||
tenant_id: str,
|
||||
|
||||
@@ -360,7 +360,7 @@ class TrainingDataOrchestrator:
|
||||
aligned_range: AlignedDateRange,
|
||||
tenant_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Collect traffic data with timeout and Madrid constraint validation"""
|
||||
"""Collect traffic data with enhanced storage and retrieval for re-training"""
|
||||
try:
|
||||
|
||||
# Double-check Madrid constraint before making request
|
||||
@@ -374,6 +374,7 @@ class TrainingDataOrchestrator:
|
||||
start_date_str = aligned_range.start.isoformat()
|
||||
end_date_str = aligned_range.end.isoformat()
|
||||
|
||||
# Fetch traffic data - this will automatically store it for future re-training
|
||||
traffic_data = await self.data_client.fetch_traffic_data(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date_str,
|
||||
@@ -383,7 +384,11 @@ class TrainingDataOrchestrator:
|
||||
|
||||
# Validate traffic data
|
||||
if self._validate_traffic_data(traffic_data):
|
||||
logger.info(f"Collected {len(traffic_data)} valid traffic records")
|
||||
logger.info(f"Collected and stored {len(traffic_data)} valid traffic records for re-training")
|
||||
|
||||
# Log storage success for audit purposes
|
||||
self._log_traffic_data_storage(lat, lon, aligned_range, len(traffic_data))
|
||||
|
||||
return traffic_data
|
||||
else:
|
||||
logger.warning("Invalid traffic data received")
|
||||
@@ -396,6 +401,69 @@ class TrainingDataOrchestrator:
|
||||
logger.warning(f"Traffic data collection failed: {e}")
|
||||
return []
|
||||
|
||||
def _log_traffic_data_storage(self,
|
||||
lat: float,
|
||||
lon: float,
|
||||
aligned_range: AlignedDateRange,
|
||||
record_count: int):
|
||||
"""Log traffic data storage for audit and re-training tracking"""
|
||||
logger.info(
|
||||
"Traffic data stored for re-training",
|
||||
location=f"{lat:.4f},{lon:.4f}",
|
||||
date_range=f"{aligned_range.start.isoformat()} to {aligned_range.end.isoformat()}",
|
||||
records_stored=record_count,
|
||||
storage_timestamp=datetime.now().isoformat(),
|
||||
purpose="model_training_and_retraining"
|
||||
)
|
||||
|
||||
async def retrieve_stored_traffic_for_retraining(
|
||||
self,
|
||||
bakery_location: Tuple[float, float],
|
||||
start_date: datetime,
|
||||
end_date: datetime,
|
||||
tenant_id: str
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve previously stored traffic data for model re-training
|
||||
This method specifically accesses the stored traffic data without making new API calls
|
||||
"""
|
||||
lat, lon = bakery_location
|
||||
|
||||
try:
|
||||
# Use the dedicated stored traffic data method for training
|
||||
stored_traffic_data = await self.data_client.fetch_stored_traffic_data_for_training(
|
||||
tenant_id=tenant_id,
|
||||
start_date=start_date.isoformat(),
|
||||
end_date=end_date.isoformat(),
|
||||
latitude=lat,
|
||||
longitude=lon
|
||||
)
|
||||
|
||||
if stored_traffic_data:
|
||||
logger.info(
|
||||
f"Retrieved {len(stored_traffic_data)} stored traffic records for re-training",
|
||||
location=f"{lat:.4f},{lon:.4f}",
|
||||
date_range=f"{start_date.isoformat()} to {end_date.isoformat()}",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
|
||||
return stored_traffic_data
|
||||
else:
|
||||
logger.warning(
|
||||
"No stored traffic data found for re-training",
|
||||
location=f"{lat:.4f},{lon:.4f}",
|
||||
date_range=f"{start_date.isoformat()} to {end_date.isoformat()}"
|
||||
)
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to retrieve stored traffic data for re-training: {e}",
|
||||
location=f"{lat:.4f},{lon:.4f}",
|
||||
tenant_id=tenant_id
|
||||
)
|
||||
return []
|
||||
|
||||
def _validate_weather_data(self, weather_data: List[Dict[str, Any]]) -> bool:
|
||||
"""Validate weather data quality"""
|
||||
if not weather_data:
|
||||
|
||||
Reference in New Issue
Block a user