bakery-ia/services/data/app/core/performance.py

# ================================================================
# services/data/app/core/performance.py
# ================================================================
"""
Performance optimization utilities for async operations
"""

import asyncio
import functools
from typing import Any, Callable, Dict, Optional, TypeVar
from datetime import datetime, timedelta, timezone
import hashlib
import json
import structlog

logger = structlog.get_logger()

T = TypeVar('T')


class AsyncCache:
    """Simple in-memory async cache with TTL"""

    def __init__(self, default_ttl: int = 300):
        self.cache: Dict[str, Dict[str, Any]] = {}
        self.default_ttl = default_ttl

    def _generate_key(self, *args, **kwargs) -> str:
        """Generate cache key from arguments"""
        key_data = {
            'args': args,
            'kwargs': sorted(kwargs.items())
        }
        key_string = json.dumps(key_data, sort_keys=True, default=str)
        return hashlib.md5(key_string.encode()).hexdigest()

    def _is_expired(self, entry: Dict[str, Any]) -> bool:
        """Check if cache entry is expired"""
        expires_at = entry.get('expires_at')
        if not expires_at:
            return True
        return datetime.now(timezone.utc) > expires_at

    async def get(self, key: str) -> Optional[Any]:
        """Get value from cache"""
        if key in self.cache:
            entry = self.cache[key]
            if not self._is_expired(entry):
                logger.debug("Cache hit", cache_key=key)
                return entry['value']
            else:
                # Clean up expired entry
                del self.cache[key]
                logger.debug("Cache expired", cache_key=key)

        logger.debug("Cache miss", cache_key=key)
        return None

    async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
        """Set value in cache"""
        ttl = ttl or self.default_ttl
        expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)

        self.cache[key] = {
            'value': value,
            'expires_at': expires_at,
            'created_at': datetime.now(timezone.utc)
        }

        logger.debug("Cache set", cache_key=key, ttl=ttl)

    async def clear(self) -> None:
        """Clear all cache entries"""
        self.cache.clear()
        logger.info("Cache cleared")

    async def cleanup_expired(self) -> int:
        """Clean up expired entries"""
        expired_keys = [
            key for key, entry in self.cache.items()
            if self._is_expired(entry)
        ]

        for key in expired_keys:
            del self.cache[key]

        if expired_keys:
            logger.info("Cleaned up expired cache entries", count=len(expired_keys))

        return len(expired_keys)


def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None):
    """Decorator for caching async function results"""

    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        _cache = cache_instance or AsyncCache(ttl)

        @functools.wraps(func)
        async def wrapper(*args, **kwargs):
            # Generate cache key
            cache_key = _cache._generate_key(func.__name__, *args, **kwargs)

            # Try to get from cache
            cached_result = await _cache.get(cache_key)
            if cached_result is not None:
                return cached_result

            # Execute function and cache result
            result = await func(*args, **kwargs)
            await _cache.set(cache_key, result, ttl)

            return result

        # Add cache management methods
        wrapper.cache_clear = _cache.clear
        wrapper.cache_cleanup = _cache.cleanup_expired

        return wrapper

    return decorator


class ConnectionPool:
    """Simple connection pool for HTTP clients"""

    def __init__(self, max_connections: int = 10):
        self.max_connections = max_connections
        self.semaphore = asyncio.Semaphore(max_connections)
        self._active_connections = 0

    async def acquire(self):
        """Acquire a connection slot"""
        await self.semaphore.acquire()
        self._active_connections += 1
        logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections)

    async def release(self):
        """Release a connection slot"""
        self.semaphore.release()
        self._active_connections = max(0, self._active_connections - 1)
        logger.debug("Connection released", active=self._active_connections, max=self.max_connections)

    async def __aenter__(self):
        await self.acquire()
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.release()


def rate_limit(calls: int, period: int):
    """Rate limiting decorator"""

    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        call_times = []
        lock = asyncio.Lock()

        @functools.wraps(func)
        async def wrapper(*args, **kwargs):
            async with lock:
                now = datetime.now(timezone.utc)

                # Remove old call times
                cutoff = now - timedelta(seconds=period)
                call_times[:] = [t for t in call_times if t > cutoff]

                # Check rate limit
                if len(call_times) >= calls:
                    sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds()
                    if sleep_time > 0:
                        logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time)
                        await asyncio.sleep(sleep_time)

                # Record this call
                call_times.append(now)

            return await func(*args, **kwargs)

        return wrapper

    return decorator


async def batch_process(
    items: list,
    process_func: Callable,
    batch_size: int = 10,
    max_concurrency: int = 5
) -> list:
    """Process items in batches with controlled concurrency"""

    results = []
    semaphore = asyncio.Semaphore(max_concurrency)

    async def process_batch(batch):
        async with semaphore:
            return await process_func(batch)

    # Create batches
    batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]

    logger.info("Processing items in batches",
               total_items=len(items),
               batches=len(batches),
               batch_size=batch_size,
               max_concurrency=max_concurrency)

    # Process batches concurrently
    batch_results = await asyncio.gather(
        *[process_batch(batch) for batch in batches],
        return_exceptions=True
    )

    # Flatten results
    for batch_result in batch_results:
        if isinstance(batch_result, Exception):
            logger.error("Batch processing error", error=str(batch_result))
            continue

        if isinstance(batch_result, list):
            results.extend(batch_result)
        else:
            results.append(batch_result)

    logger.info("Batch processing completed",
               processed_items=len(results),
               total_batches=len(batches))

    return results


class PerformanceMonitor:
    """Simple performance monitoring for async functions"""

    def __init__(self):
        self.metrics = {}

    def record_execution(self, func_name: str, duration: float, success: bool = True):
        """Record function execution metrics"""
        if func_name not in self.metrics:
            self.metrics[func_name] = {
                'call_count': 0,
                'success_count': 0,
                'error_count': 0,
                'total_duration': 0.0,
                'min_duration': float('inf'),
                'max_duration': 0.0
            }

        metric = self.metrics[func_name]
        metric['call_count'] += 1
        metric['total_duration'] += duration
        metric['min_duration'] = min(metric['min_duration'], duration)
        metric['max_duration'] = max(metric['max_duration'], duration)

        if success:
            metric['success_count'] += 1
        else:
            metric['error_count'] += 1

    def get_metrics(self, func_name: str = None) -> dict:
        """Get performance metrics"""
        if func_name:
            metric = self.metrics.get(func_name, {})
            if metric and metric['call_count'] > 0:
                metric['avg_duration'] = metric['total_duration'] / metric['call_count']
                metric['success_rate'] = metric['success_count'] / metric['call_count']
            return metric

        return self.metrics


def monitor_performance(monitor: Optional[PerformanceMonitor] = None):
    """Decorator to monitor function performance"""

    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        _monitor = monitor or PerformanceMonitor()

        @functools.wraps(func)
        async def wrapper(*args, **kwargs):
            start_time = datetime.now(timezone.utc)
            success = True

            try:
                result = await func(*args, **kwargs)
                return result
            except Exception as e:
                success = False
                raise
            finally:
                end_time = datetime.now(timezone.utc)
                duration = (end_time - start_time).total_seconds()
                _monitor.record_execution(func.__name__, duration, success)

                logger.debug("Function performance",
                           function=func.__name__,
                           duration=duration,
                           success=success)

        # Add metrics access
        wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__)

        return wrapper

    return decorator


# Global instances
global_cache = AsyncCache(default_ttl=300)
global_connection_pool = ConnectionPool(max_connections=20)
global_performance_monitor = PerformanceMonitor()