REFACTOR external service and improve websocket training
This commit is contained in:
100
services/external/app/external/aemet.py
vendored
100
services/external/app/external/aemet.py
vendored
@@ -842,10 +842,19 @@ class AEMETClient(BaseAPIClient):
|
||||
"""Fetch forecast data from AEMET API"""
|
||||
endpoint = f"/prediccion/especifica/municipio/diaria/{municipality_code}"
|
||||
initial_response = await self._get(endpoint)
|
||||
|
||||
|
||||
# Check for AEMET error responses
|
||||
if initial_response and isinstance(initial_response, dict):
|
||||
aemet_estado = initial_response.get("estado")
|
||||
if aemet_estado == 404 or aemet_estado == "404":
|
||||
logger.warning("AEMET API returned 404 error",
|
||||
mensaje=initial_response.get("descripcion"),
|
||||
municipality=municipality_code)
|
||||
return None
|
||||
|
||||
if not self._is_valid_initial_response(initial_response):
|
||||
return None
|
||||
|
||||
|
||||
datos_url = initial_response.get("datos")
|
||||
return await self._fetch_from_url(datos_url)
|
||||
|
||||
@@ -854,42 +863,65 @@ class AEMETClient(BaseAPIClient):
|
||||
# Note: AEMET hourly forecast API endpoint
|
||||
endpoint = f"/prediccion/especifica/municipio/horaria/{municipality_code}"
|
||||
logger.info("Requesting AEMET hourly forecast", endpoint=endpoint, municipality=municipality_code)
|
||||
|
||||
|
||||
initial_response = await self._get(endpoint)
|
||||
|
||||
|
||||
# Check for AEMET error responses
|
||||
if initial_response and isinstance(initial_response, dict):
|
||||
aemet_estado = initial_response.get("estado")
|
||||
if aemet_estado == 404 or aemet_estado == "404":
|
||||
logger.warning("AEMET API returned 404 error for hourly forecast",
|
||||
mensaje=initial_response.get("descripcion"),
|
||||
municipality=municipality_code)
|
||||
return None
|
||||
|
||||
if not self._is_valid_initial_response(initial_response):
|
||||
logger.warning("Invalid initial response from AEMET hourly API",
|
||||
logger.warning("Invalid initial response from AEMET hourly API",
|
||||
response=initial_response, municipality=municipality_code)
|
||||
return None
|
||||
|
||||
|
||||
datos_url = initial_response.get("datos")
|
||||
logger.info("Fetching hourly data from AEMET datos URL", url=datos_url)
|
||||
|
||||
|
||||
return await self._fetch_from_url(datos_url)
|
||||
|
||||
async def _fetch_historical_data_in_chunks(self,
|
||||
station_id: str,
|
||||
start_date: datetime,
|
||||
async def _fetch_historical_data_in_chunks(self,
|
||||
station_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime) -> List[Dict[str, Any]]:
|
||||
"""Fetch historical data in chunks due to AEMET API limitations"""
|
||||
import asyncio
|
||||
historical_data = []
|
||||
current_date = start_date
|
||||
|
||||
chunk_count = 0
|
||||
|
||||
while current_date <= end_date:
|
||||
chunk_end_date = min(
|
||||
current_date + timedelta(days=AEMETConstants.MAX_DAYS_PER_REQUEST),
|
||||
current_date + timedelta(days=AEMETConstants.MAX_DAYS_PER_REQUEST),
|
||||
end_date
|
||||
)
|
||||
|
||||
|
||||
# Add delay to respect rate limits (AEMET allows ~60 requests/minute)
|
||||
# Wait 2 seconds between requests to stay well under the limit
|
||||
if chunk_count > 0:
|
||||
await asyncio.sleep(2)
|
||||
|
||||
chunk_data = await self._fetch_historical_chunk(
|
||||
station_id, current_date, chunk_end_date
|
||||
)
|
||||
|
||||
|
||||
if chunk_data:
|
||||
historical_data.extend(chunk_data)
|
||||
|
||||
|
||||
current_date = chunk_end_date + timedelta(days=1)
|
||||
|
||||
chunk_count += 1
|
||||
|
||||
# Log progress every 5 chunks
|
||||
if chunk_count % 5 == 0:
|
||||
logger.info("Historical data fetch progress",
|
||||
chunks_fetched=chunk_count,
|
||||
records_so_far=len(historical_data))
|
||||
|
||||
return historical_data
|
||||
|
||||
async def _fetch_historical_chunk(self,
|
||||
@@ -930,13 +962,37 @@ class AEMETClient(BaseAPIClient):
|
||||
"""Fetch data from AEMET datos URL"""
|
||||
try:
|
||||
data = await self._fetch_url_directly(url)
|
||||
|
||||
if data and isinstance(data, list):
|
||||
return data
|
||||
else:
|
||||
logger.warning("Expected list from datos URL", data_type=type(data))
|
||||
|
||||
if data is None:
|
||||
logger.warning("No data received from datos URL", url=url)
|
||||
return None
|
||||
|
||||
|
||||
# Check if we got an AEMET error response (dict with estado/descripcion)
|
||||
if isinstance(data, dict):
|
||||
aemet_estado = data.get("estado")
|
||||
aemet_mensaje = data.get("descripcion")
|
||||
|
||||
if aemet_estado or aemet_mensaje:
|
||||
logger.warning("AEMET datos URL returned error response",
|
||||
estado=aemet_estado,
|
||||
mensaje=aemet_mensaje,
|
||||
url=url)
|
||||
return None
|
||||
else:
|
||||
# It's a dict but not an error response - unexpected format
|
||||
logger.warning("Expected list from datos URL but got dict",
|
||||
data_type=type(data),
|
||||
keys=list(data.keys())[:5],
|
||||
url=url)
|
||||
return None
|
||||
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
|
||||
logger.warning("Unexpected data type from datos URL",
|
||||
data_type=type(data), url=url)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Failed to fetch from datos URL", url=url, error=str(e))
|
||||
return None
|
||||
|
||||
@@ -318,49 +318,86 @@ class MadridTrafficClient(BaseTrafficClient, BaseAPIClient):
|
||||
async def _process_historical_zip_enhanced(self, zip_content: bytes, zip_url: str,
|
||||
latitude: float, longitude: float,
|
||||
nearest_points: List[Tuple[str, Dict[str, Any], float]]) -> List[Dict[str, Any]]:
|
||||
"""Process historical ZIP file with enhanced parsing"""
|
||||
"""Process historical ZIP file with memory-efficient streaming"""
|
||||
try:
|
||||
import zipfile
|
||||
import io
|
||||
import csv
|
||||
import gc
|
||||
|
||||
|
||||
historical_records = []
|
||||
nearest_ids = {p[0] for p in nearest_points}
|
||||
|
||||
|
||||
with zipfile.ZipFile(io.BytesIO(zip_content)) as zip_file:
|
||||
csv_files = [f for f in zip_file.namelist() if f.lower().endswith('.csv')]
|
||||
|
||||
|
||||
for csv_filename in csv_files:
|
||||
try:
|
||||
# Read CSV content
|
||||
# Stream CSV file line-by-line to avoid loading entire file into memory
|
||||
with zip_file.open(csv_filename) as csv_file:
|
||||
text_content = csv_file.read().decode('utf-8', errors='ignore')
|
||||
|
||||
# Process CSV in chunks using processor
|
||||
csv_records = await self.processor.process_csv_content_chunked(
|
||||
text_content, csv_filename, nearest_ids, nearest_points
|
||||
)
|
||||
|
||||
historical_records.extend(csv_records)
|
||||
|
||||
# Force garbage collection
|
||||
# Use TextIOWrapper for efficient line-by-line reading
|
||||
import codecs
|
||||
text_wrapper = codecs.iterdecode(csv_file, 'utf-8', errors='ignore')
|
||||
csv_reader = csv.DictReader(text_wrapper, delimiter=';')
|
||||
|
||||
# Process in small batches
|
||||
batch_size = 5000
|
||||
batch_records = []
|
||||
row_count = 0
|
||||
|
||||
for row in csv_reader:
|
||||
row_count += 1
|
||||
measurement_point_id = row.get('id', '').strip()
|
||||
|
||||
# Skip rows we don't need
|
||||
if measurement_point_id not in nearest_ids:
|
||||
continue
|
||||
|
||||
try:
|
||||
record_data = await self.processor.parse_historical_csv_row(row, nearest_points)
|
||||
if record_data:
|
||||
batch_records.append(record_data)
|
||||
|
||||
# Store and clear batch when full
|
||||
if len(batch_records) >= batch_size:
|
||||
historical_records.extend(batch_records)
|
||||
batch_records = []
|
||||
gc.collect()
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Store remaining records
|
||||
if batch_records:
|
||||
historical_records.extend(batch_records)
|
||||
batch_records = []
|
||||
|
||||
self.logger.info("CSV file processed",
|
||||
filename=csv_filename,
|
||||
rows_scanned=row_count,
|
||||
records_extracted=len(historical_records))
|
||||
|
||||
# Aggressive garbage collection after each CSV
|
||||
gc.collect()
|
||||
|
||||
|
||||
except Exception as csv_error:
|
||||
self.logger.warning("Error processing CSV file",
|
||||
filename=csv_filename,
|
||||
self.logger.warning("Error processing CSV file",
|
||||
filename=csv_filename,
|
||||
error=str(csv_error))
|
||||
continue
|
||||
|
||||
self.logger.info("Historical ZIP processing completed",
|
||||
|
||||
self.logger.info("Historical ZIP processing completed",
|
||||
zip_url=zip_url,
|
||||
total_records=len(historical_records))
|
||||
|
||||
|
||||
# Final cleanup
|
||||
del zip_content
|
||||
gc.collect()
|
||||
|
||||
return historical_records
|
||||
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error("Error processing historical ZIP file",
|
||||
self.logger.error("Error processing historical ZIP file",
|
||||
zip_url=zip_url, error=str(e))
|
||||
return []
|
||||
|
||||
|
||||
140
services/external/app/external/base_client.py
vendored
140
services/external/app/external/base_client.py
vendored
@@ -50,8 +50,20 @@ class BaseAPIClient:
|
||||
return response_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error", status_code=e.response.status_code, url=url,
|
||||
logger.error("HTTP error", status_code=e.response.status_code, url=url,
|
||||
response_text=e.response.text[:200], attempt=attempt + 1)
|
||||
|
||||
# Handle rate limiting (429) with longer backoff
|
||||
if e.response.status_code == 429:
|
||||
import asyncio
|
||||
# Exponential backoff: 5s, 15s, 45s for rate limits
|
||||
wait_time = 5 * (3 ** attempt)
|
||||
logger.warning(f"Rate limit hit, waiting {wait_time}s before retry",
|
||||
attempt=attempt + 1, max_attempts=self.retries)
|
||||
await asyncio.sleep(wait_time)
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
|
||||
if attempt == self.retries - 1: # Last attempt
|
||||
return None
|
||||
except httpx.RequestError as e:
|
||||
@@ -72,51 +84,87 @@ class BaseAPIClient:
|
||||
return None
|
||||
|
||||
async def _fetch_url_directly(self, url: str, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch data directly from a full URL (for AEMET datos URLs)"""
|
||||
try:
|
||||
request_headers = headers or {}
|
||||
|
||||
logger.debug("Making direct URL request", url=url)
|
||||
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(url, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Handle encoding issues common with Spanish data sources
|
||||
try:
|
||||
response_data = response.json()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
|
||||
# Try common Spanish encodings
|
||||
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
|
||||
try:
|
||||
text_content = response.content.decode(encoding)
|
||||
import json
|
||||
response_data = json.loads(text_content)
|
||||
logger.info("Successfully decoded with encoding", encoding=encoding)
|
||||
break
|
||||
except (UnicodeDecodeError, json.JSONDecodeError):
|
||||
continue
|
||||
else:
|
||||
logger.error("Failed to decode response with any encoding", url=url)
|
||||
return None
|
||||
|
||||
logger.debug("Direct URL response received",
|
||||
status_code=response.status_code,
|
||||
data_type=type(response_data),
|
||||
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
|
||||
|
||||
return response_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error in direct fetch", status_code=e.response.status_code, url=url)
|
||||
return None
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error in direct fetch", error=str(e), url=url)
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in direct fetch", error=str(e), url=url)
|
||||
return None
|
||||
"""Fetch data directly from a full URL (for AEMET datos URLs) with retry logic"""
|
||||
request_headers = headers or {}
|
||||
|
||||
logger.debug("Making direct URL request", url=url)
|
||||
|
||||
# Retry logic for unstable AEMET datos URLs
|
||||
for attempt in range(self.retries):
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.get(url, headers=request_headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Handle encoding issues common with Spanish data sources
|
||||
try:
|
||||
response_data = response.json()
|
||||
except UnicodeDecodeError:
|
||||
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
|
||||
# Try common Spanish encodings
|
||||
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
|
||||
try:
|
||||
text_content = response.content.decode(encoding)
|
||||
import json
|
||||
response_data = json.loads(text_content)
|
||||
logger.info("Successfully decoded with encoding", encoding=encoding)
|
||||
break
|
||||
except (UnicodeDecodeError, json.JSONDecodeError):
|
||||
continue
|
||||
else:
|
||||
logger.error("Failed to decode response with any encoding", url=url)
|
||||
if attempt < self.retries - 1:
|
||||
continue
|
||||
return None
|
||||
|
||||
logger.debug("Direct URL response received",
|
||||
status_code=response.status_code,
|
||||
data_type=type(response_data),
|
||||
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
|
||||
|
||||
return response_data
|
||||
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error("HTTP error in direct fetch",
|
||||
status_code=e.response.status_code,
|
||||
url=url,
|
||||
attempt=attempt + 1)
|
||||
|
||||
# On last attempt, return None
|
||||
if attempt == self.retries - 1:
|
||||
return None
|
||||
|
||||
# Wait before retry
|
||||
import asyncio
|
||||
wait_time = 2 ** attempt # 1s, 2s, 4s
|
||||
logger.info(f"Retrying datos URL in {wait_time}s",
|
||||
attempt=attempt + 1, max_attempts=self.retries)
|
||||
await asyncio.sleep(wait_time)
|
||||
|
||||
except httpx.RequestError as e:
|
||||
logger.error("Request error in direct fetch",
|
||||
error=str(e), url=url, attempt=attempt + 1)
|
||||
|
||||
# On last attempt, return None
|
||||
if attempt == self.retries - 1:
|
||||
return None
|
||||
|
||||
# Wait before retry
|
||||
import asyncio
|
||||
wait_time = 2 ** attempt # 1s, 2s, 4s
|
||||
logger.info(f"Retrying datos URL in {wait_time}s",
|
||||
attempt=attempt + 1, max_attempts=self.retries)
|
||||
await asyncio.sleep(wait_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Unexpected error in direct fetch",
|
||||
error=str(e), url=url, attempt=attempt + 1)
|
||||
|
||||
# On last attempt, return None
|
||||
if attempt == self.retries - 1:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
async def _post(self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
|
||||
"""Make POST request"""
|
||||
|
||||
Reference in New Issue
Block a user