122 lines
4.3 KiB
Python
122 lines
4.3 KiB
Python
"""
|
|
Service discovery for microservices
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
from typing import Dict, List, Optional
|
|
import httpx
|
|
import redis.asyncio as redis
|
|
from datetime import datetime, timedelta
|
|
|
|
from app.core.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class ServiceDiscovery:
|
|
"""Service discovery and health checking"""
|
|
|
|
def __init__(self):
|
|
self.redis_client = redis.from_url(settings.REDIS_URL)
|
|
self.services = settings.SERVICES
|
|
self.health_check_interval = 30 # seconds
|
|
self.health_check_task = None
|
|
|
|
async def initialize(self):
|
|
"""Initialize service discovery"""
|
|
logger.info("Initializing service discovery")
|
|
|
|
# Start health check task
|
|
self.health_check_task = asyncio.create_task(self._health_check_loop())
|
|
|
|
# Initial health check
|
|
await self._check_all_services()
|
|
|
|
async def cleanup(self):
|
|
"""Cleanup service discovery"""
|
|
if self.health_check_task:
|
|
self.health_check_task.cancel()
|
|
try:
|
|
await self.health_check_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
await self.redis_client.close()
|
|
|
|
async def get_service_url(self, service_name: str) -> Optional[str]:
|
|
"""Get service URL"""
|
|
return self.services.get(service_name)
|
|
|
|
async def get_healthy_services(self) -> List[str]:
|
|
"""Get list of healthy services"""
|
|
healthy_services = []
|
|
|
|
for service_name in self.services:
|
|
is_healthy = await self._is_service_healthy(service_name)
|
|
if is_healthy:
|
|
healthy_services.append(service_name)
|
|
|
|
return healthy_services
|
|
|
|
async def _health_check_loop(self):
|
|
"""Continuous health check loop"""
|
|
while True:
|
|
try:
|
|
await self._check_all_services()
|
|
await asyncio.sleep(self.health_check_interval)
|
|
except asyncio.CancelledError:
|
|
break
|
|
except Exception as e:
|
|
logger.error(f"Health check error: {e}")
|
|
await asyncio.sleep(self.health_check_interval)
|
|
|
|
async def _check_all_services(self):
|
|
"""Check health of all services"""
|
|
for service_name, service_url in self.services.items():
|
|
try:
|
|
is_healthy = await self._check_service_health(service_url)
|
|
await self._update_service_health(service_name, is_healthy)
|
|
except Exception as e:
|
|
logger.error(f"Health check failed for {service_name}: {e}")
|
|
await self._update_service_health(service_name, False)
|
|
|
|
async def _check_service_health(self, service_url: str) -> bool:
|
|
"""Check individual service health"""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
response = await client.get(f"{service_url}/health")
|
|
return response.status_code == 200
|
|
except Exception as e:
|
|
logger.warning(f"Service health check failed: {e}")
|
|
return False
|
|
|
|
async def _update_service_health(self, service_name: str, is_healthy: bool):
|
|
"""Update service health status in Redis"""
|
|
try:
|
|
key = f"service_health:{service_name}"
|
|
value = {
|
|
"healthy": is_healthy,
|
|
"last_check": datetime.utcnow().isoformat(),
|
|
"url": self.services[service_name]
|
|
}
|
|
|
|
await self.redis_client.hset(key, mapping=value)
|
|
await self.redis_client.expire(key, 300) # 5 minutes TTL
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to update service health for {service_name}: {e}")
|
|
|
|
async def _is_service_healthy(self, service_name: str) -> bool:
|
|
"""Check if service is healthy from Redis cache"""
|
|
try:
|
|
key = f"service_health:{service_name}"
|
|
health_data = await self.redis_client.hgetall(key)
|
|
|
|
if not health_data:
|
|
return False
|
|
|
|
return health_data.get(b'healthy', b'false').decode() == 'True'
|
|
|
|
except Exception as e:
|
|
logger.error(f"Failed to check service health for {service_name}: {e}")
|
|
return False |