Fix new services implementation 5
This commit is contained in:
@@ -236,11 +236,19 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
try:
|
||||
# Process by year and month to avoid memory issues
|
||||
current_date = start_date.replace(day=1) # Start from beginning of month
|
||||
now = datetime.now()
|
||||
|
||||
while current_date <= end_date:
|
||||
year = current_date.year
|
||||
month = current_date.month
|
||||
|
||||
# Skip current month and future months (no historical data available yet)
|
||||
if (year == now.year and month >= now.month) or year > now.year:
|
||||
self.logger.info("Skipping current/future month - no historical data available",
|
||||
year=year, month=month)
|
||||
current_date = self._next_month(current_date)
|
||||
continue
|
||||
|
||||
# Build historical URL
|
||||
zip_url = self.api_client._build_historical_url(year, month)
|
||||
|
||||
@@ -251,7 +259,7 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
zip_content = await self.api_client.fetch_historical_zip(zip_url)
|
||||
if not zip_content:
|
||||
self.logger.warning("Failed to fetch historical ZIP", url=zip_url)
|
||||
current_date = current_date.replace(month=current_date.month + 1) if current_date.month < 12 else current_date.replace(year=current_date.year + 1, month=1)
|
||||
current_date = self._next_month(current_date)
|
||||
continue
|
||||
|
||||
# Process ZIP content with enhanced parsing
|
||||
@@ -286,11 +294,8 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
filtered_records=len(filtered_records),
|
||||
total_records=len(historical_records))
|
||||
|
||||
# Move to next month
|
||||
if current_date.month == 12:
|
||||
current_date = current_date.replace(year=current_date.year + 1, month=1)
|
||||
else:
|
||||
current_date = current_date.replace(month=current_date.month + 1)
|
||||
# Move to next month - extracted to helper method
|
||||
current_date = self._next_month(current_date)
|
||||
|
||||
return historical_records
|
||||
|
||||
@@ -347,4 +352,10 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
zip_url=zip_url, error=str(e))
|
||||
return []
|
||||
|
||||
def _next_month(self, current_date: datetime) -> datetime:
|
||||
"""Helper method to move to next month"""
|
||||
if current_date.month == 12:
|
||||
return current_date.replace(year=current_date.year + 1, month=1)
|
||||
else:
|
||||
return current_date.replace(month=current_date.month + 1)
|
||||
|
||||
|
||||
@@ -42,22 +42,9 @@ class MadridTrafficAPIClient(BaseAPIClient):
|
||||
|
||||
def _build_historical_url(self, year: int, month: int) -> str:
|
||||
"""Build historical ZIP URL for given year and month"""
|
||||
# Madrid historical data URL pattern
|
||||
base_url = "https://datos.madrid.es/egob/catalogo/208627"
|
||||
|
||||
# URL numbering pattern (this may need adjustment based on actual URLs)
|
||||
# Note: Historical data is only available for past periods, not current/future
|
||||
if year == 2023:
|
||||
url_number = 116 + (month - 1) # 116-127 for 2023
|
||||
elif year == 2024:
|
||||
url_number = 128 + (month - 1) # 128-139 for 2024
|
||||
elif year == 2025:
|
||||
# For 2025, use the continuing numbering from 2024
|
||||
url_number = 140 + (month - 1) # Starting from 140 for January 2025
|
||||
else:
|
||||
url_number = 116 # Fallback to 2023 data
|
||||
|
||||
return f"{base_url}-{url_number}-transporte-ptomedida-historico.zip"
|
||||
# Madrid uses a direct file pattern now: https://datos.madrid.es/egobfiles/MANUAL/208627/MM-YYYY.zip
|
||||
# Only historical data is available (not current month)
|
||||
return f"https://datos.madrid.es/egobfiles/MANUAL/208627/{month:02d}-{year}.zip"
|
||||
|
||||
async def fetch_current_traffic_xml(self, endpoint: Optional[str] = None) -> Optional[str]:
|
||||
"""Fetch current traffic XML data"""
|
||||
|
||||
@@ -84,18 +84,22 @@ class TrafficRepository:
|
||||
if not traffic_data_list:
|
||||
return 0
|
||||
|
||||
# Check for existing records to avoid duplicates
|
||||
# Check for existing records to avoid duplicates - batch the queries to avoid parameter limit
|
||||
dates = [data.get('date') for data in traffic_data_list if data.get('date')]
|
||||
existing_dates = set()
|
||||
if dates:
|
||||
existing_stmt = select(TrafficData.date).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date.in_(dates)
|
||||
# PostgreSQL has a limit of 32767 parameters, so batch the queries
|
||||
batch_size = 30000 # Safe batch size under the limit
|
||||
for i in range(0, len(dates), batch_size):
|
||||
date_batch = dates[i:i + batch_size]
|
||||
existing_stmt = select(TrafficData.date).where(
|
||||
and_(
|
||||
TrafficData.location_id == location_id,
|
||||
TrafficData.date.in_(date_batch)
|
||||
)
|
||||
)
|
||||
)
|
||||
result = await self.session.execute(existing_stmt)
|
||||
existing_dates = {row[0] for row in result.fetchall()}
|
||||
result = await self.session.execute(existing_stmt)
|
||||
existing_dates.update({row[0] for row in result.fetchall()})
|
||||
logger.debug(f"Found {len(existing_dates)} existing records for location {location_id}")
|
||||
|
||||
batch_records = []
|
||||
|
||||
Reference in New Issue
Block a user