Fix new services implementation 5

This commit is contained in:
Urtzi Alfaro
2025-08-15 17:53:59 +02:00
parent 03b4d4185d
commit f7de9115d1
43 changed files with 1714 additions and 891 deletions

View File

@@ -236,11 +236,19 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
try:
# Process by year and month to avoid memory issues
current_date = start_date.replace(day=1) # Start from beginning of month
now = datetime.now()
while current_date <= end_date:
year = current_date.year
month = current_date.month
# Skip current month and future months (no historical data available yet)
if (year == now.year and month >= now.month) or year > now.year:
self.logger.info("Skipping current/future month - no historical data available",
year=year, month=month)
current_date = self._next_month(current_date)
continue
# Build historical URL
zip_url = self.api_client._build_historical_url(year, month)
@@ -251,7 +259,7 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
zip_content = await self.api_client.fetch_historical_zip(zip_url)
if not zip_content:
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
current_date = self._next_month(current_date)
continue
# Process ZIP content with enhanced parsing
@@ -286,11 +294,8 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
filtered_records=len(filtered_records),
total_records=len(historical_records))
# Move to next month
if current_date.month == 12:
current_date = current_date.replace(year=current_date.year + 1, month=1)
else:
current_date = current_date.replace(month=current_date.month + 1)
# Move to next month - extracted to helper method
current_date = self._next_month(current_date)
return historical_records
@@ -347,4 +352,10 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
zip_url=zip_url, error=str(e))
return []
def _next_month(self, current_date: datetime) -> datetime:
"""Helper method to move to next month"""
if current_date.month == 12:
return current_date.replace(year=current_date.year + 1, month=1)
else:
return current_date.replace(month=current_date.month + 1)

View File

@@ -42,22 +42,9 @@ class MadridTrafficAPIClient(BaseAPIClient):
def _build_historical_url(self, year: int, month: int) -> str:
"""Build historical ZIP URL for given year and month"""
# Madrid historical data URL pattern
base_url = "https://datos.madrid.es/egob/catalogo/208627"
# URL numbering pattern (this may need adjustment based on actual URLs)
# Note: Historical data is only available for past periods, not current/future
if year == 2023:
url_number = 116 + (month - 1) # 116-127 for 2023
elif year == 2024:
url_number = 128 + (month - 1) # 128-139 for 2024
elif year == 2025:
# For 2025, use the continuing numbering from 2024
url_number = 140 + (month - 1) # Starting from 140 for January 2025
else:
url_number = 116 # Fallback to 2023 data
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
# Madrid uses a direct file pattern now: https://datos.madrid.es/egobfiles/MANUAL/208627/MM-YYYY.zip
# Only historical data is available (not current month)
return f"https://datos.madrid.es/egobfiles/MANUAL/208627/{month:02d}-{year}.zip"
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
"""Fetch current traffic XML data"""

View File

@@ -84,18 +84,22 @@ class TrafficRepository:
if not traffic_data_list:
return 0
# Check for existing records to avoid duplicates
# Check for existing records to avoid duplicates - batch the queries to avoid parameter limit
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
existing_dates = set()
if dates:
existing_stmt = select(TrafficData.date).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date.in_(dates)
# PostgreSQL has a limit of 32767 parameters, so batch the queries
batch_size = 30000 # Safe batch size under the limit
for i in range(0, len(dates), batch_size):
date_batch = dates[i:i + batch_size]
existing_stmt = select(TrafficData.date).where(
and_(
TrafficData.location_id == location_id,
TrafficData.date.in_(date_batch)
)
)
)
result = await self.session.execute(existing_stmt)
existing_dates = {row[0] for row in result.fetchall()}
result = await self.session.execute(existing_stmt)
existing_dates.update({row[0] for row in result.fetchall()})
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
batch_records = []