REFACTOR external service and improve websocket training

This commit is contained in:
Urtzi Alfaro
2025-10-09 14:11:02 +02:00
parent 7c72f83c51
commit 3c689b4f98
111 changed files with 13289 additions and 2374 deletions

View File

@@ -36,70 +36,102 @@ class ExternalServiceClient(BaseServiceClient):
longitude: Optional[float] = None
) -> Optional[List[Dict[str, Any]]]:
"""
Get weather data for a date range and location
Uses POST request as per original implementation
Get historical weather data using NEW v2.0 optimized city-based endpoint
This uses pre-loaded data from the database with Redis caching for <100ms response times
"""
# Prepare request payload with proper date handling
payload = {
"start_date": start_date, # Already in ISO format from calling code
"end_date": end_date, # Already in ISO format from calling code
# Prepare query parameters
params = {
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038
"longitude": longitude or -3.7038,
"start_date": start_date, # ISO format datetime
"end_date": end_date # ISO format datetime
}
logger.info(f"Weather request payload: {payload}", tenant_id=tenant_id)
# Use POST request with extended timeout
logger.info(f"Weather request (v2.0 optimized): {params}", tenant_id=tenant_id)
# Use GET request to new optimized endpoint with short timeout (data is cached)
result = await self._make_request(
"POST",
"weather/historical",
"GET",
"external/operations/historical-weather-optimized",
tenant_id=tenant_id,
data=payload,
timeout=2000.0 # Match original timeout
params=params,
timeout=10.0 # Much shorter - data is pre-loaded and cached
)
if result:
logger.info(f"Successfully fetched {len(result)} weather records")
logger.info(f"Successfully fetched {len(result)} weather records from v2.0 endpoint")
return result
else:
logger.error("Failed to fetch weather data")
logger.warning("No weather data returned from v2.0 endpoint")
return []
async def get_current_weather(
self,
tenant_id: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> Optional[Dict[str, Any]]:
"""
Get current weather for a location (real-time data)
Uses new v2.0 endpoint
"""
params = {
"latitude": latitude or 40.4168,
"longitude": longitude or -3.7038
}
logger.info(f"Current weather request (v2.0): {params}", tenant_id=tenant_id)
result = await self._make_request(
"GET",
"external/operations/weather/current",
tenant_id=tenant_id,
params=params,
timeout=10.0
)
if result:
logger.info("Successfully fetched current weather")
return result
else:
logger.warning("No current weather data available")
return None
async def get_weather_forecast(
self,
tenant_id: str,
days: int = 1,
days: int = 7,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> Optional[List[Dict[str, Any]]]:
"""
Get weather forecast for location
FIXED: Uses GET request with query parameters as expected by the weather API
Get weather forecast for location (from AEMET)
Uses new v2.0 endpoint
"""
payload = {
"latitude": latitude or 40.4168, # Default Madrid coordinates
params = {
"latitude": latitude or 40.4168,
"longitude": longitude or -3.7038,
"days": days
}
logger.info(f"Weather forecast request params: {payload}", tenant_id=tenant_id)
logger.info(f"Weather forecast request (v2.0): {params}", tenant_id=tenant_id)
result = await self._make_request(
"POST",
"weather/forecast",
"GET",
"external/operations/weather/forecast",
tenant_id=tenant_id,
data=payload,
timeout=200.0
params=params,
timeout=10.0
)
if result:
logger.info(f"Successfully fetched weather forecast for {days} days")
return result
else:
logger.error("Failed to fetch weather forecast")
logger.warning("No forecast data available")
return []
# ================================================================
# TRAFFIC DATA
# ================================================================
@@ -113,48 +145,34 @@ class ExternalServiceClient(BaseServiceClient):
longitude: Optional[float] = None
) -> Optional[List[Dict[str, Any]]]:
"""
Get traffic data for a date range and location
Uses POST request with extended timeout for Madrid traffic data processing
Get historical traffic data using NEW v2.0 optimized city-based endpoint
This uses pre-loaded data from the database with Redis caching for <100ms response times
"""
# Prepare request payload
payload = {
"start_date": start_date, # Already in ISO format from calling code
"end_date": end_date, # Already in ISO format from calling code
# Prepare query parameters
params = {
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038
"longitude": longitude or -3.7038,
"start_date": start_date, # ISO format datetime
"end_date": end_date # ISO format datetime
}
logger.info(f"Traffic request payload: {payload}", tenant_id=tenant_id)
# Madrid traffic data can take 5-10 minutes to download and process
traffic_timeout = httpx.Timeout(
connect=30.0, # Connection timeout
read=600.0, # Read timeout: 10 minutes (was 30s)
write=30.0, # Write timeout
pool=30.0 # Pool timeout
)
# Use POST request with extended timeout
logger.info("Making traffic data request",
url="traffic/historical",
tenant_id=tenant_id,
timeout=traffic_timeout.read)
logger.info(f"Traffic request (v2.0 optimized): {params}", tenant_id=tenant_id)
# Use GET request to new optimized endpoint with short timeout (data is cached)
result = await self._make_request(
"POST",
"traffic/historical",
"GET",
"external/operations/historical-traffic-optimized",
tenant_id=tenant_id,
data=payload,
timeout=traffic_timeout
params=params,
timeout=10.0 # Much shorter - data is pre-loaded and cached
)
if result:
logger.info(f"Successfully fetched {len(result)} traffic records")
logger.info(f"Successfully fetched {len(result)} traffic records from v2.0 endpoint")
return result
else:
logger.error("Failed to fetch traffic data - _make_request returned None")
logger.error("This could be due to: network timeout, HTTP error, authentication failure, or service unavailable")
return None
logger.warning("No traffic data returned from v2.0 endpoint")
return []
async def get_stored_traffic_data_for_training(
self,
@@ -165,39 +183,49 @@ class ExternalServiceClient(BaseServiceClient):
longitude: Optional[float] = None
) -> Optional[List[Dict[str, Any]]]:
"""
Get stored traffic data specifically for model training/re-training
This method prioritizes database-stored data over API calls
Get stored traffic data for model training/re-training
In v2.0, this uses the same optimized endpoint as get_traffic_data
since all data is pre-loaded and cached
"""
# Prepare request payload
payload = {
"start_date": start_date,
"end_date": end_date,
"latitude": latitude or 40.4168, # Default Madrid coordinates
"longitude": longitude or -3.7038,
"stored_only": True # Flag to indicate we want stored data only
}
logger.info(f"Training traffic data request: {payload}", tenant_id=tenant_id)
# Standard timeout since we're only querying the database
training_timeout = httpx.Timeout(
connect=30.0,
read=120.0, # 2 minutes should be enough for database query
write=30.0,
pool=30.0
)
result = await self._make_request(
"POST",
"traffic/stored", # New endpoint for stored traffic data
logger.info("Training traffic data request - delegating to optimized endpoint", tenant_id=tenant_id)
# Delegate to the same optimized endpoint
return await self.get_traffic_data(
tenant_id=tenant_id,
data=payload,
timeout=training_timeout
start_date=start_date,
end_date=end_date,
latitude=latitude,
longitude=longitude
)
async def get_current_traffic(
self,
tenant_id: str,
latitude: Optional[float] = None,
longitude: Optional[float] = None
) -> Optional[Dict[str, Any]]:
"""
Get current traffic conditions for a location (real-time data)
Uses new v2.0 endpoint
"""
params = {
"latitude": latitude or 40.4168,
"longitude": longitude or -3.7038
}
logger.info(f"Current traffic request (v2.0): {params}", tenant_id=tenant_id)
result = await self._make_request(
"GET",
"external/operations/traffic/current",
tenant_id=tenant_id,
params=params,
timeout=10.0
)
if result:
logger.info(f"Successfully retrieved {len(result)} stored traffic records for training")
logger.info("Successfully fetched current traffic")
return result
else:
logger.warning("No stored traffic data available for training")
logger.warning("No current traffic data available")
return None

View File

@@ -49,6 +49,7 @@ class BaseServiceSettings(BaseSettings):
DB_MAX_OVERFLOW: int = int(os.getenv("DB_MAX_OVERFLOW", "20"))
DB_POOL_TIMEOUT: int = int(os.getenv("DB_POOL_TIMEOUT", "30"))
DB_POOL_RECYCLE: int = int(os.getenv("DB_POOL_RECYCLE", "3600"))
DB_POOL_PRE_PING: bool = os.getenv("DB_POOL_PRE_PING", "true").lower() == "true"
DB_ECHO: bool = os.getenv("DB_ECHO", "false").lower() == "true"
# ================================================================
@@ -399,6 +400,7 @@ class BaseServiceSettings(BaseSettings):
"max_overflow": self.DB_MAX_OVERFLOW,
"pool_timeout": self.DB_POOL_TIMEOUT,
"pool_recycle": self.DB_POOL_RECYCLE,
"pool_pre_ping": self.DB_POOL_PRE_PING,
"echo": self.DB_ECHO,
}

View File

@@ -7,6 +7,7 @@ from typing import Dict, Any, Callable, Optional
from datetime import datetime, date
import uuid
import structlog
from contextlib import suppress
try:
import aio_pika
@@ -17,6 +18,50 @@ except ImportError:
logger = structlog.get_logger()
class HeartbeatMonitor:
"""Monitor to ensure heartbeats are processed during heavy operations"""
def __init__(self, client):
self.client = client
self._monitor_task = None
self._should_monitor = False
async def start_monitoring(self):
"""Start heartbeat monitoring task"""
if self._monitor_task and not self._monitor_task.done():
return
self._should_monitor = True
self._monitor_task = asyncio.create_task(self._monitor_loop())
async def stop_monitoring(self):
"""Stop heartbeat monitoring task"""
self._should_monitor = False
if self._monitor_task and not self._monitor_task.done():
self._monitor_task.cancel()
with suppress(asyncio.CancelledError):
await self._monitor_task
async def _monitor_loop(self):
"""Monitor loop that periodically yields control for heartbeat processing"""
while self._should_monitor:
# Yield control to allow heartbeat processing
await asyncio.sleep(0.1)
# Verify connection is still alive
if self.client.connection and not self.client.connection.is_closed:
# Check if connection is still responsive
try:
# This is a lightweight check to ensure the connection is responsive
pass # The heartbeat mechanism in aio_pika handles this internally
except Exception as e:
logger.warning("Connection check failed", error=str(e))
self.client.connected = False
break
else:
logger.warning("Connection is closed, stopping monitor")
break
def json_serializer(obj):
"""JSON serializer for objects not serializable by default json code"""
if isinstance(obj, (datetime, date)):
@@ -42,6 +87,7 @@ class RabbitMQClient:
self.connected = False
self._reconnect_attempts = 0
self._max_reconnect_attempts = 5
self.heartbeat_monitor = HeartbeatMonitor(self)
async def connect(self):
"""Connect to RabbitMQ with retry logic"""
@@ -52,14 +98,17 @@ class RabbitMQClient:
try:
self.connection = await connect_robust(
self.connection_url,
heartbeat=30,
connection_attempts=3
heartbeat=600 # Increase heartbeat to 600 seconds (10 minutes) to prevent timeouts
)
self.channel = await self.connection.channel()
await self.channel.set_qos(prefetch_count=100) # Performance optimization
self.connected = True
self._reconnect_attempts = 0
# Start heartbeat monitoring
await self.heartbeat_monitor.start_monitoring()
logger.info("Connected to RabbitMQ", service=self.service_name)
return True
@@ -75,11 +124,28 @@ class RabbitMQClient:
return False
async def disconnect(self):
"""Disconnect from RabbitMQ"""
if self.connection and not self.connection.is_closed:
await self.connection.close()
"""Disconnect from RabbitMQ with proper channel cleanup"""
try:
# Stop heartbeat monitoring first
await self.heartbeat_monitor.stop_monitoring()
# Close channel before connection to avoid "unexpected close" warnings
if self.channel and not self.channel.is_closed:
await self.channel.close()
logger.debug("RabbitMQ channel closed", service=self.service_name)
# Then close connection
if self.connection and not self.connection.is_closed:
await self.connection.close()
logger.info("Disconnected from RabbitMQ", service=self.service_name)
self.connected = False
except Exception as e:
logger.warning("Error during RabbitMQ disconnect",
service=self.service_name,
error=str(e))
self.connected = False
logger.info("Disconnected from RabbitMQ", service=self.service_name)
async def ensure_connected(self) -> bool:
"""Ensure connection is active, reconnect if needed"""