Improve the traffic fetching system

This commit is contained in:
Urtzi Alfaro
2025-08-08 23:29:48 +02:00
parent 8af17f1433
commit 312fdc8ef3
8 changed files with 680 additions and 51 deletions

View File

@@ -360,7 +360,7 @@ class TrainingDataOrchestrator:
aligned_range: AlignedDateRange,
tenant_id: str
) -> List[Dict[str, Any]]:
"""Collect traffic data with timeout and Madrid constraint validation"""
"""Collect traffic data with enhanced storage and retrieval for re-training"""
try:
# Double-check Madrid constraint before making request
@@ -374,6 +374,7 @@ class TrainingDataOrchestrator:
start_date_str = aligned_range.start.isoformat()
end_date_str = aligned_range.end.isoformat()
# Fetch traffic data - this will automatically store it for future re-training
traffic_data = await self.data_client.fetch_traffic_data(
tenant_id=tenant_id,
start_date=start_date_str,
@@ -383,7 +384,11 @@ class TrainingDataOrchestrator:
# Validate traffic data
if self._validate_traffic_data(traffic_data):
logger.info(f"Collected {len(traffic_data)} valid traffic records")
logger.info(f"Collected and stored {len(traffic_data)} valid traffic records for re-training")
# Log storage success for audit purposes
self._log_traffic_data_storage(lat, lon, aligned_range, len(traffic_data))
return traffic_data
else:
logger.warning("Invalid traffic data received")
@@ -396,6 +401,69 @@ class TrainingDataOrchestrator:
logger.warning(f"Traffic data collection failed: {e}")
return []
def _log_traffic_data_storage(self,
lat: float,
lon: float,
aligned_range: AlignedDateRange,
record_count: int):
"""Log traffic data storage for audit and re-training tracking"""
logger.info(
"Traffic data stored for re-training",
location=f"{lat:.4f},{lon:.4f}",
date_range=f"{aligned_range.start.isoformat()} to {aligned_range.end.isoformat()}",
records_stored=record_count,
storage_timestamp=datetime.now().isoformat(),
purpose="model_training_and_retraining"
)
async def retrieve_stored_traffic_for_retraining(
self,
bakery_location: Tuple[float, float],
start_date: datetime,
end_date: datetime,
tenant_id: str
) -> List[Dict[str, Any]]:
"""
Retrieve previously stored traffic data for model re-training
This method specifically accesses the stored traffic data without making new API calls
"""
lat, lon = bakery_location
try:
# Use the dedicated stored traffic data method for training
stored_traffic_data = await self.data_client.fetch_stored_traffic_data_for_training(
tenant_id=tenant_id,
start_date=start_date.isoformat(),
end_date=end_date.isoformat(),
latitude=lat,
longitude=lon
)
if stored_traffic_data:
logger.info(
f"Retrieved {len(stored_traffic_data)} stored traffic records for re-training",
location=f"{lat:.4f},{lon:.4f}",
date_range=f"{start_date.isoformat()} to {end_date.isoformat()}",
tenant_id=tenant_id
)
return stored_traffic_data
else:
logger.warning(
"No stored traffic data found for re-training",
location=f"{lat:.4f},{lon:.4f}",
date_range=f"{start_date.isoformat()} to {end_date.isoformat()}"
)
return []
except Exception as e:
logger.error(
f"Failed to retrieve stored traffic data for re-training: {e}",
location=f"{lat:.4f},{lon:.4f}",
tenant_id=tenant_id
)
return []
def _validate_weather_data(self, weather_data: List[Dict[str, Any]]) -> bool:
"""Validate weather data quality"""
if not weather_data: