Files
bakery-ia/services/data/app/core/performance.py
2025-08-10 17:31:38 +02:00

312 lines
10 KiB
Python

# ================================================================
# services/data/app/core/performance.py
# ================================================================
"""
Performance optimization utilities for async operations
"""
import asyncio
import functools
from typing import Any, Callable, Dict, Optional, TypeVar
from datetime import datetime, timedelta, timezone
import hashlib
import json
import structlog
logger = structlog.get_logger()
T = TypeVar('T')
class AsyncCache:
"""Simple in-memory async cache with TTL"""
def __init__(self, default_ttl: int = 300):
self.cache: Dict[str, Dict[str, Any]] = {}
self.default_ttl = default_ttl
def _generate_key(self, *args, **kwargs) -> str:
"""Generate cache key from arguments"""
key_data = {
'args': args,
'kwargs': sorted(kwargs.items())
}
key_string = json.dumps(key_data, sort_keys=True, default=str)
return hashlib.md5(key_string.encode()).hexdigest()
def _is_expired(self, entry: Dict[str, Any]) -> bool:
"""Check if cache entry is expired"""
expires_at = entry.get('expires_at')
if not expires_at:
return True
return datetime.now(timezone.utc) > expires_at
async def get(self, key: str) -> Optional[Any]:
"""Get value from cache"""
if key in self.cache:
entry = self.cache[key]
if not self._is_expired(entry):
logger.debug("Cache hit", cache_key=key)
return entry['value']
else:
# Clean up expired entry
del self.cache[key]
logger.debug("Cache expired", cache_key=key)
logger.debug("Cache miss", cache_key=key)
return None
async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
"""Set value in cache"""
ttl = ttl or self.default_ttl
expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)
self.cache[key] = {
'value': value,
'expires_at': expires_at,
'created_at': datetime.now(timezone.utc)
}
logger.debug("Cache set", cache_key=key, ttl=ttl)
async def clear(self) -> None:
"""Clear all cache entries"""
self.cache.clear()
logger.info("Cache cleared")
async def cleanup_expired(self) -> int:
"""Clean up expired entries"""
expired_keys = [
key for key, entry in self.cache.items()
if self._is_expired(entry)
]
for key in expired_keys:
del self.cache[key]
if expired_keys:
logger.info("Cleaned up expired cache entries", count=len(expired_keys))
return len(expired_keys)
def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None):
"""Decorator for caching async function results"""
def decorator(func: Callable[..., T]) -> Callable[..., T]:
_cache = cache_instance or AsyncCache(ttl)
@functools.wraps(func)
async def wrapper(*args, **kwargs):
# Generate cache key
cache_key = _cache._generate_key(func.__name__, *args, **kwargs)
# Try to get from cache
cached_result = await _cache.get(cache_key)
if cached_result is not None:
return cached_result
# Execute function and cache result
result = await func(*args, **kwargs)
await _cache.set(cache_key, result, ttl)
return result
# Add cache management methods
wrapper.cache_clear = _cache.clear
wrapper.cache_cleanup = _cache.cleanup_expired
return wrapper
return decorator
class ConnectionPool:
"""Simple connection pool for HTTP clients"""
def __init__(self, max_connections: int = 10):
self.max_connections = max_connections
self.semaphore = asyncio.Semaphore(max_connections)
self._active_connections = 0
async def acquire(self):
"""Acquire a connection slot"""
await self.semaphore.acquire()
self._active_connections += 1
logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections)
async def release(self):
"""Release a connection slot"""
self.semaphore.release()
self._active_connections = max(0, self._active_connections - 1)
logger.debug("Connection released", active=self._active_connections, max=self.max_connections)
async def __aenter__(self):
await self.acquire()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.release()
def rate_limit(calls: int, period: int):
"""Rate limiting decorator"""
def decorator(func: Callable[..., T]) -> Callable[..., T]:
call_times = []
lock = asyncio.Lock()
@functools.wraps(func)
async def wrapper(*args, **kwargs):
async with lock:
now = datetime.now(timezone.utc)
# Remove old call times
cutoff = now - timedelta(seconds=period)
call_times[:] = [t for t in call_times if t > cutoff]
# Check rate limit
if len(call_times) >= calls:
sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds()
if sleep_time > 0:
logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time)
await asyncio.sleep(sleep_time)
# Record this call
call_times.append(now)
return await func(*args, **kwargs)
return wrapper
return decorator
async def batch_process(
items: list,
process_func: Callable,
batch_size: int = 10,
max_concurrency: int = 5
) -> list:
"""Process items in batches with controlled concurrency"""
results = []
semaphore = asyncio.Semaphore(max_concurrency)
async def process_batch(batch):
async with semaphore:
return await process_func(batch)
# Create batches
batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
logger.info("Processing items in batches",
total_items=len(items),
batches=len(batches),
batch_size=batch_size,
max_concurrency=max_concurrency)
# Process batches concurrently
batch_results = await asyncio.gather(
*[process_batch(batch) for batch in batches],
return_exceptions=True
)
# Flatten results
for batch_result in batch_results:
if isinstance(batch_result, Exception):
logger.error("Batch processing error", error=str(batch_result))
continue
if isinstance(batch_result, list):
results.extend(batch_result)
else:
results.append(batch_result)
logger.info("Batch processing completed",
processed_items=len(results),
total_batches=len(batches))
return results
class PerformanceMonitor:
"""Simple performance monitoring for async functions"""
def __init__(self):
self.metrics = {}
def record_execution(self, func_name: str, duration: float, success: bool = True):
"""Record function execution metrics"""
if func_name not in self.metrics:
self.metrics[func_name] = {
'call_count': 0,
'success_count': 0,
'error_count': 0,
'total_duration': 0.0,
'min_duration': float('inf'),
'max_duration': 0.0
}
metric = self.metrics[func_name]
metric['call_count'] += 1
metric['total_duration'] += duration
metric['min_duration'] = min(metric['min_duration'], duration)
metric['max_duration'] = max(metric['max_duration'], duration)
if success:
metric['success_count'] += 1
else:
metric['error_count'] += 1
def get_metrics(self, func_name: str = None) -> dict:
"""Get performance metrics"""
if func_name:
metric = self.metrics.get(func_name, {})
if metric and metric['call_count'] > 0:
metric['avg_duration'] = metric['total_duration'] / metric['call_count']
metric['success_rate'] = metric['success_count'] / metric['call_count']
return metric
return self.metrics
def monitor_performance(monitor: Optional[PerformanceMonitor] = None):
"""Decorator to monitor function performance"""
def decorator(func: Callable[..., T]) -> Callable[..., T]:
_monitor = monitor or PerformanceMonitor()
@functools.wraps(func)
async def wrapper(*args, **kwargs):
start_time = datetime.now(timezone.utc)
success = True
try:
result = await func(*args, **kwargs)
return result
except Exception as e:
success = False
raise
finally:
end_time = datetime.now(timezone.utc)
duration = (end_time - start_time).total_seconds()
_monitor.record_execution(func.__name__, duration, success)
logger.debug("Function performance",
function=func.__name__,
duration=duration,
success=success)
# Add metrics access
wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__)
return wrapper
return decorator
# Global instances
global_cache = AsyncCache(default_ttl=300)
global_connection_pool = ConnectionPool(max_connections=20)
global_performance_monitor = PerformanceMonitor()