Files
bakery-ia/services/external/app/external/base_client.py

204 lines
9.1 KiB
Python

# ================================================================
# services/data/app/external/base_client.py
# ================================================================
"""Base HTTP client for external APIs - Enhanced for AEMET"""
import httpx
from typing import Dict, Any, Optional
import structlog
from datetime import datetime
logger = structlog.get_logger()
class BaseAPIClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
self.base_url = base_url
self.api_key = api_key
# Increase timeout and add connection retries for unstable AEMET API
self.timeout = httpx.Timeout(60.0) # Increased from 30s
self.retries = 3
async def _get(self, endpoint: str, params: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Make GET request with retry logic for unstable APIs"""
url = f"{self.base_url}{endpoint}"
# Add API key to params for AEMET (not headers)
request_params = params or {}
if self.api_key:
request_params["api_key"] = self.api_key
# Add headers if provided
request_headers = headers or {}
logger.debug("Making API request", url=url, params=request_params)
# Retry logic for unstable AEMET API
for attempt in range(self.retries):
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, params=request_params, headers=request_headers)
response.raise_for_status()
# Log response for debugging
response_data = response.json()
logger.debug("API response received",
status_code=response.status_code,
response_keys=list(response_data.keys()) if isinstance(response_data, dict) else "non-dict",
attempt=attempt + 1)
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error", status_code=e.response.status_code, url=url,
response_text=e.response.text[:200], attempt=attempt + 1)
# Handle rate limiting (429) with longer backoff
if e.response.status_code == 429:
import asyncio
# Exponential backoff: 5s, 15s, 45s for rate limits
wait_time = 5 * (3 ** attempt)
logger.warning(f"Rate limit hit, waiting {wait_time}s before retry",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
if attempt < self.retries - 1:
continue
if attempt == self.retries - 1: # Last attempt
return None
except httpx.RequestError as e:
logger.error("Request error", error=str(e), url=url, attempt=attempt + 1)
if attempt == self.retries - 1: # Last attempt
return None
# Wait before retry (exponential backoff)
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying AEMET API in {wait_time}s", attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except Exception as e:
logger.error("Unexpected error", error=str(e), url=url, attempt=attempt + 1)
if attempt == self.retries - 1: # Last attempt
return None
return None
async def _fetch_url_directly(self, url: str, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Fetch data directly from a full URL (for AEMET datos URLs) with retry logic"""
request_headers = headers or {}
logger.debug("Making direct URL request", url=url)
# Retry logic for unstable AEMET datos URLs
for attempt in range(self.retries):
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
# Handle encoding issues common with Spanish data sources
try:
response_data = response.json()
except UnicodeDecodeError:
logger.warning("UTF-8 decode failed, trying alternative encodings", url=url)
# Try common Spanish encodings
for encoding in ['latin-1', 'windows-1252', 'iso-8859-1']:
try:
text_content = response.content.decode(encoding)
import json
response_data = json.loads(text_content)
logger.info("Successfully decoded with encoding", encoding=encoding)
break
except (UnicodeDecodeError, json.JSONDecodeError):
continue
else:
logger.error("Failed to decode response with any encoding", url=url)
if attempt < self.retries - 1:
continue
return None
logger.debug("Direct URL response received",
status_code=response.status_code,
data_type=type(response_data),
data_length=len(response_data) if isinstance(response_data, (list, dict)) else "unknown")
return response_data
except httpx.HTTPStatusError as e:
logger.error("HTTP error in direct fetch",
status_code=e.response.status_code,
url=url,
attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
# Wait before retry
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying datos URL in {wait_time}s",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except httpx.RequestError as e:
logger.error("Request error in direct fetch",
error=str(e), url=url, attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
# Wait before retry
import asyncio
wait_time = 2 ** attempt # 1s, 2s, 4s
logger.info(f"Retrying datos URL in {wait_time}s",
attempt=attempt + 1, max_attempts=self.retries)
await asyncio.sleep(wait_time)
except Exception as e:
logger.error("Unexpected error in direct fetch",
error=str(e), url=url, attempt=attempt + 1)
# On last attempt, return None
if attempt == self.retries - 1:
return None
return None
async def _post(self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None) -> Optional[Dict[str, Any]]:
"""Make POST request"""
try:
url = f"{self.base_url}{endpoint}"
request_headers = headers or {}
if self.api_key:
request_headers["Authorization"] = f"Bearer {self.api_key}"
async with httpx.AsyncClient(timeout=self.timeout) as client:
response = await client.post(url, json=data, headers=request_headers)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
logger.error("HTTP error", status_code=e.response.status_code, url=url)
return None
except httpx.RequestError as e:
logger.error("Request error", error=str(e), url=url)
return None
except Exception as e:
logger.error("Unexpected error", error=str(e), url=url)
return None
async def get_direct(self, url: str, headers: Optional[Dict] = None, timeout: Optional[int] = None) -> httpx.Response:
"""
Public GET method for direct HTTP requests
Returns the raw httpx Response object for maximum flexibility
"""
request_headers = headers or {}
request_timeout = httpx.Timeout(timeout if timeout else 30.0)
async with httpx.AsyncClient(timeout=request_timeout, follow_redirects=True) as client:
response = await client.get(url, headers=request_headers)
response.raise_for_status()
return response