Improve the traffic fetching system
This commit is contained in:
312
services/data/app/core/performance.py
Normal file
312
services/data/app/core/performance.py
Normal file
@@ -0,0 +1,312 @@
|
||||
# ================================================================
|
||||
# services/data/app/core/performance.py
|
||||
# ================================================================
|
||||
"""
|
||||
Performance optimization utilities for async operations
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import functools
|
||||
from typing import Any, Callable, Dict, Optional, TypeVar
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import hashlib
|
||||
import json
|
||||
import structlog
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
|
||||
class AsyncCache:
|
||||
"""Simple in-memory async cache with TTL"""
|
||||
|
||||
def __init__(self, default_ttl: int = 300):
|
||||
self.cache: Dict[str, Dict[str, Any]] = {}
|
||||
self.default_ttl = default_ttl
|
||||
|
||||
def _generate_key(self, *args, **kwargs) -> str:
|
||||
"""Generate cache key from arguments"""
|
||||
key_data = {
|
||||
'args': args,
|
||||
'kwargs': sorted(kwargs.items())
|
||||
}
|
||||
key_string = json.dumps(key_data, sort_keys=True, default=str)
|
||||
return hashlib.md5(key_string.encode()).hexdigest()
|
||||
|
||||
def _is_expired(self, entry: Dict[str, Any]) -> bool:
|
||||
"""Check if cache entry is expired"""
|
||||
expires_at = entry.get('expires_at')
|
||||
if not expires_at:
|
||||
return True
|
||||
return datetime.now(timezone.utc) > expires_at
|
||||
|
||||
async def get(self, key: str) -> Optional[Any]:
|
||||
"""Get value from cache"""
|
||||
if key in self.cache:
|
||||
entry = self.cache[key]
|
||||
if not self._is_expired(entry):
|
||||
logger.debug("Cache hit", cache_key=key)
|
||||
return entry['value']
|
||||
else:
|
||||
# Clean up expired entry
|
||||
del self.cache[key]
|
||||
logger.debug("Cache expired", cache_key=key)
|
||||
|
||||
logger.debug("Cache miss", cache_key=key)
|
||||
return None
|
||||
|
||||
async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
||||
"""Set value in cache"""
|
||||
ttl = ttl or self.default_ttl
|
||||
expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)
|
||||
|
||||
self.cache[key] = {
|
||||
'value': value,
|
||||
'expires_at': expires_at,
|
||||
'created_at': datetime.now(timezone.utc)
|
||||
}
|
||||
|
||||
logger.debug("Cache set", cache_key=key, ttl=ttl)
|
||||
|
||||
async def clear(self) -> None:
|
||||
"""Clear all cache entries"""
|
||||
self.cache.clear()
|
||||
logger.info("Cache cleared")
|
||||
|
||||
async def cleanup_expired(self) -> int:
|
||||
"""Clean up expired entries"""
|
||||
expired_keys = [
|
||||
key for key, entry in self.cache.items()
|
||||
if self._is_expired(entry)
|
||||
]
|
||||
|
||||
for key in expired_keys:
|
||||
del self.cache[key]
|
||||
|
||||
if expired_keys:
|
||||
logger.info("Cleaned up expired cache entries", count=len(expired_keys))
|
||||
|
||||
return len(expired_keys)
|
||||
|
||||
|
||||
def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None):
|
||||
"""Decorator for caching async function results"""
|
||||
|
||||
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
||||
_cache = cache_instance or AsyncCache(ttl)
|
||||
|
||||
@functools.wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
# Generate cache key
|
||||
cache_key = _cache._generate_key(func.__name__, *args, **kwargs)
|
||||
|
||||
# Try to get from cache
|
||||
cached_result = await _cache.get(cache_key)
|
||||
if cached_result is not None:
|
||||
return cached_result
|
||||
|
||||
# Execute function and cache result
|
||||
result = await func(*args, **kwargs)
|
||||
await _cache.set(cache_key, result, ttl)
|
||||
|
||||
return result
|
||||
|
||||
# Add cache management methods
|
||||
wrapper.cache_clear = _cache.clear
|
||||
wrapper.cache_cleanup = _cache.cleanup_expired
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
class ConnectionPool:
|
||||
"""Simple connection pool for HTTP clients"""
|
||||
|
||||
def __init__(self, max_connections: int = 10):
|
||||
self.max_connections = max_connections
|
||||
self.semaphore = asyncio.Semaphore(max_connections)
|
||||
self._active_connections = 0
|
||||
|
||||
async def acquire(self):
|
||||
"""Acquire a connection slot"""
|
||||
await self.semaphore.acquire()
|
||||
self._active_connections += 1
|
||||
logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections)
|
||||
|
||||
async def release(self):
|
||||
"""Release a connection slot"""
|
||||
self.semaphore.release()
|
||||
self._active_connections = max(0, self._active_connections - 1)
|
||||
logger.debug("Connection released", active=self._active_connections, max=self.max_connections)
|
||||
|
||||
async def __aenter__(self):
|
||||
await self.acquire()
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
await self.release()
|
||||
|
||||
|
||||
def rate_limit(calls: int, period: int):
|
||||
"""Rate limiting decorator"""
|
||||
|
||||
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
||||
call_times = []
|
||||
lock = asyncio.Lock()
|
||||
|
||||
@functools.wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
async with lock:
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# Remove old call times
|
||||
cutoff = now - timedelta(seconds=period)
|
||||
call_times[:] = [t for t in call_times if t > cutoff]
|
||||
|
||||
# Check rate limit
|
||||
if len(call_times) >= calls:
|
||||
sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds()
|
||||
if sleep_time > 0:
|
||||
logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time)
|
||||
await asyncio.sleep(sleep_time)
|
||||
|
||||
# Record this call
|
||||
call_times.append(now)
|
||||
|
||||
return await func(*args, **kwargs)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
async def batch_process(
|
||||
items: list,
|
||||
process_func: Callable,
|
||||
batch_size: int = 10,
|
||||
max_concurrency: int = 5
|
||||
) -> list:
|
||||
"""Process items in batches with controlled concurrency"""
|
||||
|
||||
results = []
|
||||
semaphore = asyncio.Semaphore(max_concurrency)
|
||||
|
||||
async def process_batch(batch):
|
||||
async with semaphore:
|
||||
return await process_func(batch)
|
||||
|
||||
# Create batches
|
||||
batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
|
||||
|
||||
logger.info("Processing items in batches",
|
||||
total_items=len(items),
|
||||
batches=len(batches),
|
||||
batch_size=batch_size,
|
||||
max_concurrency=max_concurrency)
|
||||
|
||||
# Process batches concurrently
|
||||
batch_results = await asyncio.gather(
|
||||
*[process_batch(batch) for batch in batches],
|
||||
return_exceptions=True
|
||||
)
|
||||
|
||||
# Flatten results
|
||||
for batch_result in batch_results:
|
||||
if isinstance(batch_result, Exception):
|
||||
logger.error("Batch processing error", error=str(batch_result))
|
||||
continue
|
||||
|
||||
if isinstance(batch_result, list):
|
||||
results.extend(batch_result)
|
||||
else:
|
||||
results.append(batch_result)
|
||||
|
||||
logger.info("Batch processing completed",
|
||||
processed_items=len(results),
|
||||
total_batches=len(batches))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class PerformanceMonitor:
|
||||
"""Simple performance monitoring for async functions"""
|
||||
|
||||
def __init__(self):
|
||||
self.metrics = {}
|
||||
|
||||
def record_execution(self, func_name: str, duration: float, success: bool = True):
|
||||
"""Record function execution metrics"""
|
||||
if func_name not in self.metrics:
|
||||
self.metrics[func_name] = {
|
||||
'call_count': 0,
|
||||
'success_count': 0,
|
||||
'error_count': 0,
|
||||
'total_duration': 0.0,
|
||||
'min_duration': float('inf'),
|
||||
'max_duration': 0.0
|
||||
}
|
||||
|
||||
metric = self.metrics[func_name]
|
||||
metric['call_count'] += 1
|
||||
metric['total_duration'] += duration
|
||||
metric['min_duration'] = min(metric['min_duration'], duration)
|
||||
metric['max_duration'] = max(metric['max_duration'], duration)
|
||||
|
||||
if success:
|
||||
metric['success_count'] += 1
|
||||
else:
|
||||
metric['error_count'] += 1
|
||||
|
||||
def get_metrics(self, func_name: str = None) -> dict:
|
||||
"""Get performance metrics"""
|
||||
if func_name:
|
||||
metric = self.metrics.get(func_name, {})
|
||||
if metric and metric['call_count'] > 0:
|
||||
metric['avg_duration'] = metric['total_duration'] / metric['call_count']
|
||||
metric['success_rate'] = metric['success_count'] / metric['call_count']
|
||||
return metric
|
||||
|
||||
return self.metrics
|
||||
|
||||
|
||||
def monitor_performance(monitor: Optional[PerformanceMonitor] = None):
|
||||
"""Decorator to monitor function performance"""
|
||||
|
||||
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
||||
_monitor = monitor or PerformanceMonitor()
|
||||
|
||||
@functools.wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
start_time = datetime.now(timezone.utc)
|
||||
success = True
|
||||
|
||||
try:
|
||||
result = await func(*args, **kwargs)
|
||||
return result
|
||||
except Exception as e:
|
||||
success = False
|
||||
raise
|
||||
finally:
|
||||
end_time = datetime.now(timezone.utc)
|
||||
duration = (end_time - start_time).total_seconds()
|
||||
_monitor.record_execution(func.__name__, duration, success)
|
||||
|
||||
logger.debug("Function performance",
|
||||
function=func.__name__,
|
||||
duration=duration,
|
||||
success=success)
|
||||
|
||||
# Add metrics access
|
||||
wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__)
|
||||
|
||||
return wrapper
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
# Global instances
|
||||
global_cache = AsyncCache(default_ttl=300)
|
||||
global_connection_pool = ConnectionPool(max_connections=20)
|
||||
global_performance_monitor = PerformanceMonitor()
|
||||
Reference in New Issue
Block a user