312 lines
10 KiB
Python
312 lines
10 KiB
Python
|
|
# ================================================================
|
||
|
|
# services/data/app/core/performance.py
|
||
|
|
# ================================================================
|
||
|
|
"""
|
||
|
|
Performance optimization utilities for async operations
|
||
|
|
"""
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
import functools
|
||
|
|
from typing import Any, Callable, Dict, Optional, TypeVar
|
||
|
|
from datetime import datetime, timedelta, timezone
|
||
|
|
import hashlib
|
||
|
|
import json
|
||
|
|
import structlog
|
||
|
|
|
||
|
|
logger = structlog.get_logger()
|
||
|
|
|
||
|
|
T = TypeVar('T')
|
||
|
|
|
||
|
|
|
||
|
|
class AsyncCache:
|
||
|
|
"""Simple in-memory async cache with TTL"""
|
||
|
|
|
||
|
|
def __init__(self, default_ttl: int = 300):
|
||
|
|
self.cache: Dict[str, Dict[str, Any]] = {}
|
||
|
|
self.default_ttl = default_ttl
|
||
|
|
|
||
|
|
def _generate_key(self, *args, **kwargs) -> str:
|
||
|
|
"""Generate cache key from arguments"""
|
||
|
|
key_data = {
|
||
|
|
'args': args,
|
||
|
|
'kwargs': sorted(kwargs.items())
|
||
|
|
}
|
||
|
|
key_string = json.dumps(key_data, sort_keys=True, default=str)
|
||
|
|
return hashlib.md5(key_string.encode()).hexdigest()
|
||
|
|
|
||
|
|
def _is_expired(self, entry: Dict[str, Any]) -> bool:
|
||
|
|
"""Check if cache entry is expired"""
|
||
|
|
expires_at = entry.get('expires_at')
|
||
|
|
if not expires_at:
|
||
|
|
return True
|
||
|
|
return datetime.now(timezone.utc) > expires_at
|
||
|
|
|
||
|
|
async def get(self, key: str) -> Optional[Any]:
|
||
|
|
"""Get value from cache"""
|
||
|
|
if key in self.cache:
|
||
|
|
entry = self.cache[key]
|
||
|
|
if not self._is_expired(entry):
|
||
|
|
logger.debug("Cache hit", cache_key=key)
|
||
|
|
return entry['value']
|
||
|
|
else:
|
||
|
|
# Clean up expired entry
|
||
|
|
del self.cache[key]
|
||
|
|
logger.debug("Cache expired", cache_key=key)
|
||
|
|
|
||
|
|
logger.debug("Cache miss", cache_key=key)
|
||
|
|
return None
|
||
|
|
|
||
|
|
async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
|
||
|
|
"""Set value in cache"""
|
||
|
|
ttl = ttl or self.default_ttl
|
||
|
|
expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)
|
||
|
|
|
||
|
|
self.cache[key] = {
|
||
|
|
'value': value,
|
||
|
|
'expires_at': expires_at,
|
||
|
|
'created_at': datetime.now(timezone.utc)
|
||
|
|
}
|
||
|
|
|
||
|
|
logger.debug("Cache set", cache_key=key, ttl=ttl)
|
||
|
|
|
||
|
|
async def clear(self) -> None:
|
||
|
|
"""Clear all cache entries"""
|
||
|
|
self.cache.clear()
|
||
|
|
logger.info("Cache cleared")
|
||
|
|
|
||
|
|
async def cleanup_expired(self) -> int:
|
||
|
|
"""Clean up expired entries"""
|
||
|
|
expired_keys = [
|
||
|
|
key for key, entry in self.cache.items()
|
||
|
|
if self._is_expired(entry)
|
||
|
|
]
|
||
|
|
|
||
|
|
for key in expired_keys:
|
||
|
|
del self.cache[key]
|
||
|
|
|
||
|
|
if expired_keys:
|
||
|
|
logger.info("Cleaned up expired cache entries", count=len(expired_keys))
|
||
|
|
|
||
|
|
return len(expired_keys)
|
||
|
|
|
||
|
|
|
||
|
|
def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None):
|
||
|
|
"""Decorator for caching async function results"""
|
||
|
|
|
||
|
|
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
||
|
|
_cache = cache_instance or AsyncCache(ttl)
|
||
|
|
|
||
|
|
@functools.wraps(func)
|
||
|
|
async def wrapper(*args, **kwargs):
|
||
|
|
# Generate cache key
|
||
|
|
cache_key = _cache._generate_key(func.__name__, *args, **kwargs)
|
||
|
|
|
||
|
|
# Try to get from cache
|
||
|
|
cached_result = await _cache.get(cache_key)
|
||
|
|
if cached_result is not None:
|
||
|
|
return cached_result
|
||
|
|
|
||
|
|
# Execute function and cache result
|
||
|
|
result = await func(*args, **kwargs)
|
||
|
|
await _cache.set(cache_key, result, ttl)
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
# Add cache management methods
|
||
|
|
wrapper.cache_clear = _cache.clear
|
||
|
|
wrapper.cache_cleanup = _cache.cleanup_expired
|
||
|
|
|
||
|
|
return wrapper
|
||
|
|
|
||
|
|
return decorator
|
||
|
|
|
||
|
|
|
||
|
|
class ConnectionPool:
|
||
|
|
"""Simple connection pool for HTTP clients"""
|
||
|
|
|
||
|
|
def __init__(self, max_connections: int = 10):
|
||
|
|
self.max_connections = max_connections
|
||
|
|
self.semaphore = asyncio.Semaphore(max_connections)
|
||
|
|
self._active_connections = 0
|
||
|
|
|
||
|
|
async def acquire(self):
|
||
|
|
"""Acquire a connection slot"""
|
||
|
|
await self.semaphore.acquire()
|
||
|
|
self._active_connections += 1
|
||
|
|
logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections)
|
||
|
|
|
||
|
|
async def release(self):
|
||
|
|
"""Release a connection slot"""
|
||
|
|
self.semaphore.release()
|
||
|
|
self._active_connections = max(0, self._active_connections - 1)
|
||
|
|
logger.debug("Connection released", active=self._active_connections, max=self.max_connections)
|
||
|
|
|
||
|
|
async def __aenter__(self):
|
||
|
|
await self.acquire()
|
||
|
|
return self
|
||
|
|
|
||
|
|
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||
|
|
await self.release()
|
||
|
|
|
||
|
|
|
||
|
|
def rate_limit(calls: int, period: int):
|
||
|
|
"""Rate limiting decorator"""
|
||
|
|
|
||
|
|
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
||
|
|
call_times = []
|
||
|
|
lock = asyncio.Lock()
|
||
|
|
|
||
|
|
@functools.wraps(func)
|
||
|
|
async def wrapper(*args, **kwargs):
|
||
|
|
async with lock:
|
||
|
|
now = datetime.now(timezone.utc)
|
||
|
|
|
||
|
|
# Remove old call times
|
||
|
|
cutoff = now - timedelta(seconds=period)
|
||
|
|
call_times[:] = [t for t in call_times if t > cutoff]
|
||
|
|
|
||
|
|
# Check rate limit
|
||
|
|
if len(call_times) >= calls:
|
||
|
|
sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds()
|
||
|
|
if sleep_time > 0:
|
||
|
|
logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time)
|
||
|
|
await asyncio.sleep(sleep_time)
|
||
|
|
|
||
|
|
# Record this call
|
||
|
|
call_times.append(now)
|
||
|
|
|
||
|
|
return await func(*args, **kwargs)
|
||
|
|
|
||
|
|
return wrapper
|
||
|
|
|
||
|
|
return decorator
|
||
|
|
|
||
|
|
|
||
|
|
async def batch_process(
|
||
|
|
items: list,
|
||
|
|
process_func: Callable,
|
||
|
|
batch_size: int = 10,
|
||
|
|
max_concurrency: int = 5
|
||
|
|
) -> list:
|
||
|
|
"""Process items in batches with controlled concurrency"""
|
||
|
|
|
||
|
|
results = []
|
||
|
|
semaphore = asyncio.Semaphore(max_concurrency)
|
||
|
|
|
||
|
|
async def process_batch(batch):
|
||
|
|
async with semaphore:
|
||
|
|
return await process_func(batch)
|
||
|
|
|
||
|
|
# Create batches
|
||
|
|
batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
|
||
|
|
|
||
|
|
logger.info("Processing items in batches",
|
||
|
|
total_items=len(items),
|
||
|
|
batches=len(batches),
|
||
|
|
batch_size=batch_size,
|
||
|
|
max_concurrency=max_concurrency)
|
||
|
|
|
||
|
|
# Process batches concurrently
|
||
|
|
batch_results = await asyncio.gather(
|
||
|
|
*[process_batch(batch) for batch in batches],
|
||
|
|
return_exceptions=True
|
||
|
|
)
|
||
|
|
|
||
|
|
# Flatten results
|
||
|
|
for batch_result in batch_results:
|
||
|
|
if isinstance(batch_result, Exception):
|
||
|
|
logger.error("Batch processing error", error=str(batch_result))
|
||
|
|
continue
|
||
|
|
|
||
|
|
if isinstance(batch_result, list):
|
||
|
|
results.extend(batch_result)
|
||
|
|
else:
|
||
|
|
results.append(batch_result)
|
||
|
|
|
||
|
|
logger.info("Batch processing completed",
|
||
|
|
processed_items=len(results),
|
||
|
|
total_batches=len(batches))
|
||
|
|
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
class PerformanceMonitor:
|
||
|
|
"""Simple performance monitoring for async functions"""
|
||
|
|
|
||
|
|
def __init__(self):
|
||
|
|
self.metrics = {}
|
||
|
|
|
||
|
|
def record_execution(self, func_name: str, duration: float, success: bool = True):
|
||
|
|
"""Record function execution metrics"""
|
||
|
|
if func_name not in self.metrics:
|
||
|
|
self.metrics[func_name] = {
|
||
|
|
'call_count': 0,
|
||
|
|
'success_count': 0,
|
||
|
|
'error_count': 0,
|
||
|
|
'total_duration': 0.0,
|
||
|
|
'min_duration': float('inf'),
|
||
|
|
'max_duration': 0.0
|
||
|
|
}
|
||
|
|
|
||
|
|
metric = self.metrics[func_name]
|
||
|
|
metric['call_count'] += 1
|
||
|
|
metric['total_duration'] += duration
|
||
|
|
metric['min_duration'] = min(metric['min_duration'], duration)
|
||
|
|
metric['max_duration'] = max(metric['max_duration'], duration)
|
||
|
|
|
||
|
|
if success:
|
||
|
|
metric['success_count'] += 1
|
||
|
|
else:
|
||
|
|
metric['error_count'] += 1
|
||
|
|
|
||
|
|
def get_metrics(self, func_name: str = None) -> dict:
|
||
|
|
"""Get performance metrics"""
|
||
|
|
if func_name:
|
||
|
|
metric = self.metrics.get(func_name, {})
|
||
|
|
if metric and metric['call_count'] > 0:
|
||
|
|
metric['avg_duration'] = metric['total_duration'] / metric['call_count']
|
||
|
|
metric['success_rate'] = metric['success_count'] / metric['call_count']
|
||
|
|
return metric
|
||
|
|
|
||
|
|
return self.metrics
|
||
|
|
|
||
|
|
|
||
|
|
def monitor_performance(monitor: Optional[PerformanceMonitor] = None):
|
||
|
|
"""Decorator to monitor function performance"""
|
||
|
|
|
||
|
|
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
||
|
|
_monitor = monitor or PerformanceMonitor()
|
||
|
|
|
||
|
|
@functools.wraps(func)
|
||
|
|
async def wrapper(*args, **kwargs):
|
||
|
|
start_time = datetime.now(timezone.utc)
|
||
|
|
success = True
|
||
|
|
|
||
|
|
try:
|
||
|
|
result = await func(*args, **kwargs)
|
||
|
|
return result
|
||
|
|
except Exception as e:
|
||
|
|
success = False
|
||
|
|
raise
|
||
|
|
finally:
|
||
|
|
end_time = datetime.now(timezone.utc)
|
||
|
|
duration = (end_time - start_time).total_seconds()
|
||
|
|
_monitor.record_execution(func.__name__, duration, success)
|
||
|
|
|
||
|
|
logger.debug("Function performance",
|
||
|
|
function=func.__name__,
|
||
|
|
duration=duration,
|
||
|
|
success=success)
|
||
|
|
|
||
|
|
# Add metrics access
|
||
|
|
wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__)
|
||
|
|
|
||
|
|
return wrapper
|
||
|
|
|
||
|
|
return decorator
|
||
|
|
|
||
|
|
|
||
|
|
# Global instances
|
||
|
|
global_cache = AsyncCache(default_ttl=300)
|
||
|
|
global_connection_pool = ConnectionPool(max_connections=20)
|
||
|
|
global_performance_monitor = PerformanceMonitor()
|