# ================================================================ # services/data/app/core/performance.py # ================================================================ """ Performance optimization utilities for async operations """ import asyncio import functools from typing import Any, Callable, Dict, Optional, TypeVar from datetime import datetime, timedelta, timezone import hashlib import json import structlog logger = structlog.get_logger() T = TypeVar('T') class AsyncCache: """Simple in-memory async cache with TTL""" def __init__(self, default_ttl: int = 300): self.cache: Dict[str, Dict[str, Any]] = {} self.default_ttl = default_ttl def _generate_key(self, *args, **kwargs) -> str: """Generate cache key from arguments""" key_data = { 'args': args, 'kwargs': sorted(kwargs.items()) } key_string = json.dumps(key_data, sort_keys=True, default=str) return hashlib.md5(key_string.encode()).hexdigest() def _is_expired(self, entry: Dict[str, Any]) -> bool: """Check if cache entry is expired""" expires_at = entry.get('expires_at') if not expires_at: return True return datetime.now(timezone.utc) > expires_at async def get(self, key: str) -> Optional[Any]: """Get value from cache""" if key in self.cache: entry = self.cache[key] if not self._is_expired(entry): logger.debug("Cache hit", cache_key=key) return entry['value'] else: # Clean up expired entry del self.cache[key] logger.debug("Cache expired", cache_key=key) logger.debug("Cache miss", cache_key=key) return None async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None: """Set value in cache""" ttl = ttl or self.default_ttl expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl) self.cache[key] = { 'value': value, 'expires_at': expires_at, 'created_at': datetime.now(timezone.utc) } logger.debug("Cache set", cache_key=key, ttl=ttl) async def clear(self) -> None: """Clear all cache entries""" self.cache.clear() logger.info("Cache cleared") async def cleanup_expired(self) -> int: """Clean up expired entries""" expired_keys = [ key for key, entry in self.cache.items() if self._is_expired(entry) ] for key in expired_keys: del self.cache[key] if expired_keys: logger.info("Cleaned up expired cache entries", count=len(expired_keys)) return len(expired_keys) def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None): """Decorator for caching async function results""" def decorator(func: Callable[..., T]) -> Callable[..., T]: _cache = cache_instance or AsyncCache(ttl) @functools.wraps(func) async def wrapper(*args, **kwargs): # Generate cache key cache_key = _cache._generate_key(func.__name__, *args, **kwargs) # Try to get from cache cached_result = await _cache.get(cache_key) if cached_result is not None: return cached_result # Execute function and cache result result = await func(*args, **kwargs) await _cache.set(cache_key, result, ttl) return result # Add cache management methods wrapper.cache_clear = _cache.clear wrapper.cache_cleanup = _cache.cleanup_expired return wrapper return decorator class ConnectionPool: """Simple connection pool for HTTP clients""" def __init__(self, max_connections: int = 10): self.max_connections = max_connections self.semaphore = asyncio.Semaphore(max_connections) self._active_connections = 0 async def acquire(self): """Acquire a connection slot""" await self.semaphore.acquire() self._active_connections += 1 logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections) async def release(self): """Release a connection slot""" self.semaphore.release() self._active_connections = max(0, self._active_connections - 1) logger.debug("Connection released", active=self._active_connections, max=self.max_connections) async def __aenter__(self): await self.acquire() return self async def __aexit__(self, exc_type, exc_val, exc_tb): await self.release() def rate_limit(calls: int, period: int): """Rate limiting decorator""" def decorator(func: Callable[..., T]) -> Callable[..., T]: call_times = [] lock = asyncio.Lock() @functools.wraps(func) async def wrapper(*args, **kwargs): async with lock: now = datetime.now(timezone.utc) # Remove old call times cutoff = now - timedelta(seconds=period) call_times[:] = [t for t in call_times if t > cutoff] # Check rate limit if len(call_times) >= calls: sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds() if sleep_time > 0: logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time) await asyncio.sleep(sleep_time) # Record this call call_times.append(now) return await func(*args, **kwargs) return wrapper return decorator async def batch_process( items: list, process_func: Callable, batch_size: int = 10, max_concurrency: int = 5 ) -> list: """Process items in batches with controlled concurrency""" results = [] semaphore = asyncio.Semaphore(max_concurrency) async def process_batch(batch): async with semaphore: return await process_func(batch) # Create batches batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)] logger.info("Processing items in batches", total_items=len(items), batches=len(batches), batch_size=batch_size, max_concurrency=max_concurrency) # Process batches concurrently batch_results = await asyncio.gather( *[process_batch(batch) for batch in batches], return_exceptions=True ) # Flatten results for batch_result in batch_results: if isinstance(batch_result, Exception): logger.error("Batch processing error", error=str(batch_result)) continue if isinstance(batch_result, list): results.extend(batch_result) else: results.append(batch_result) logger.info("Batch processing completed", processed_items=len(results), total_batches=len(batches)) return results class PerformanceMonitor: """Simple performance monitoring for async functions""" def __init__(self): self.metrics = {} def record_execution(self, func_name: str, duration: float, success: bool = True): """Record function execution metrics""" if func_name not in self.metrics: self.metrics[func_name] = { 'call_count': 0, 'success_count': 0, 'error_count': 0, 'total_duration': 0.0, 'min_duration': float('inf'), 'max_duration': 0.0 } metric = self.metrics[func_name] metric['call_count'] += 1 metric['total_duration'] += duration metric['min_duration'] = min(metric['min_duration'], duration) metric['max_duration'] = max(metric['max_duration'], duration) if success: metric['success_count'] += 1 else: metric['error_count'] += 1 def get_metrics(self, func_name: str = None) -> dict: """Get performance metrics""" if func_name: metric = self.metrics.get(func_name, {}) if metric and metric['call_count'] > 0: metric['avg_duration'] = metric['total_duration'] / metric['call_count'] metric['success_rate'] = metric['success_count'] / metric['call_count'] return metric return self.metrics def monitor_performance(monitor: Optional[PerformanceMonitor] = None): """Decorator to monitor function performance""" def decorator(func: Callable[..., T]) -> Callable[..., T]: _monitor = monitor or PerformanceMonitor() @functools.wraps(func) async def wrapper(*args, **kwargs): start_time = datetime.now(timezone.utc) success = True try: result = await func(*args, **kwargs) return result except Exception as e: success = False raise finally: end_time = datetime.now(timezone.utc) duration = (end_time - start_time).total_seconds() _monitor.record_execution(func.__name__, duration, success) logger.debug("Function performance", function=func.__name__, duration=duration, success=success) # Add metrics access wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__) return wrapper return decorator # Global instances global_cache = AsyncCache(default_ttl=300) global_connection_pool = ConnectionPool(max_connections=20) global_performance_monitor = PerformanceMonitor()