bakery-ia/services/data/app/core/performance.py

# ================================================================
# services/data/app/core/performance.py
# ================================================================
"""
Performance optimization utilities for async operations
"""

import asyncio
import functools
from typing import Any, Callable, Dict, Optional, TypeVar
from datetime import datetime, timedelta, timezone
import hashlib
import json
import structlog

logger = structlog.get_logger()

T = TypeVar('T')


class AsyncCache:
    """Simple in-memory async cache with TTL"""
    
    def __init__(self, default_ttl: int = 300):
        self.cache: Dict[str, Dict[str, Any]] = {}
        self.default_ttl = default_ttl
    
    def _generate_key(self, *args, **kwargs) -> str:
        """Generate cache key from arguments"""
        key_data = {
            'args': args,
            'kwargs': sorted(kwargs.items())
        }
        key_string = json.dumps(key_data, sort_keys=True, default=str)
        return hashlib.md5(key_string.encode()).hexdigest()
    
    def _is_expired(self, entry: Dict[str, Any]) -> bool:
        """Check if cache entry is expired"""
        expires_at = entry.get('expires_at')
        if not expires_at:
            return True
        return datetime.now(timezone.utc) > expires_at
    
    async def get(self, key: str) -> Optional[Any]:
        """Get value from cache"""
        if key in self.cache:
            entry = self.cache[key]
            if not self._is_expired(entry):
                logger.debug("Cache hit", cache_key=key)
                return entry['value']
            else:
                # Clean up expired entry
                del self.cache[key]
                logger.debug("Cache expired", cache_key=key)
        
        logger.debug("Cache miss", cache_key=key)
        return None
    
    async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
        """Set value in cache"""
        ttl = ttl or self.default_ttl
        expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)
        
        self.cache[key] = {
            'value': value,
            'expires_at': expires_at,
            'created_at': datetime.now(timezone.utc)
        }
        
        logger.debug("Cache set", cache_key=key, ttl=ttl)
    
    async def clear(self) -> None:
        """Clear all cache entries"""
        self.cache.clear()
        logger.info("Cache cleared")
    
    async def cleanup_expired(self) -> int:
        """Clean up expired entries"""
        expired_keys = [
            key for key, entry in self.cache.items()
            if self._is_expired(entry)
        ]
        
        for key in expired_keys:
            del self.cache[key]
        
        if expired_keys:
            logger.info("Cleaned up expired cache entries", count=len(expired_keys))
        
        return len(expired_keys)


def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None):
    """Decorator for caching async function results"""
    
    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        _cache = cache_instance or AsyncCache(ttl)
        
        @functools.wraps(func)
        async def wrapper(*args, **kwargs):
            # Generate cache key
            cache_key = _cache._generate_key(func.__name__, *args, **kwargs)
            
            # Try to get from cache
            cached_result = await _cache.get(cache_key)
            if cached_result is not None:
                return cached_result
            
            # Execute function and cache result
            result = await func(*args, **kwargs)
            await _cache.set(cache_key, result, ttl)
            
            return result
        
        # Add cache management methods
        wrapper.cache_clear = _cache.clear
        wrapper.cache_cleanup = _cache.cleanup_expired
        
        return wrapper
    
    return decorator


class ConnectionPool:
    """Simple connection pool for HTTP clients"""
    
    def __init__(self, max_connections: int = 10):
        self.max_connections = max_connections
        self.semaphore = asyncio.Semaphore(max_connections)
        self._active_connections = 0
    
    async def acquire(self):
        """Acquire a connection slot"""
        await self.semaphore.acquire()
        self._active_connections += 1
        logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections)
    
    async def release(self):
        """Release a connection slot"""
        self.semaphore.release()
        self._active_connections = max(0, self._active_connections - 1)
        logger.debug("Connection released", active=self._active_connections, max=self.max_connections)
    
    async def __aenter__(self):
        await self.acquire()
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.release()


def rate_limit(calls: int, period: int):
    """Rate limiting decorator"""
    
    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        call_times = []
        lock = asyncio.Lock()
        
        @functools.wraps(func)
        async def wrapper(*args, **kwargs):
            async with lock:
                now = datetime.now(timezone.utc)
                
                # Remove old call times
                cutoff = now - timedelta(seconds=period)
                call_times[:] = [t for t in call_times if t > cutoff]
                
                # Check rate limit
                if len(call_times) >= calls:
                    sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds()
                    if sleep_time > 0:
                        logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time)
                        await asyncio.sleep(sleep_time)
                
                # Record this call
                call_times.append(now)
            
            return await func(*args, **kwargs)
        
        return wrapper
    
    return decorator


async def batch_process(
    items: list,
    process_func: Callable,
    batch_size: int = 10,
    max_concurrency: int = 5
) -> list:
    """Process items in batches with controlled concurrency"""
    
    results = []
    semaphore = asyncio.Semaphore(max_concurrency)
    
    async def process_batch(batch):
        async with semaphore:
            return await process_func(batch)
    
    # Create batches
    batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
    
    logger.info("Processing items in batches", 
               total_items=len(items), 
               batches=len(batches), 
               batch_size=batch_size,
               max_concurrency=max_concurrency)
    
    # Process batches concurrently
    batch_results = await asyncio.gather(
        *[process_batch(batch) for batch in batches],
        return_exceptions=True
    )
    
    # Flatten results
    for batch_result in batch_results:
        if isinstance(batch_result, Exception):
            logger.error("Batch processing error", error=str(batch_result))
            continue
        
        if isinstance(batch_result, list):
            results.extend(batch_result)
        else:
            results.append(batch_result)
    
    logger.info("Batch processing completed", 
               processed_items=len(results), 
               total_batches=len(batches))
    
    return results


class PerformanceMonitor:
    """Simple performance monitoring for async functions"""
    
    def __init__(self):
        self.metrics = {}
    
    def record_execution(self, func_name: str, duration: float, success: bool = True):
        """Record function execution metrics"""
        if func_name not in self.metrics:
            self.metrics[func_name] = {
                'call_count': 0,
                'success_count': 0,
                'error_count': 0,
                'total_duration': 0.0,
                'min_duration': float('inf'),
                'max_duration': 0.0
            }
        
        metric = self.metrics[func_name]
        metric['call_count'] += 1
        metric['total_duration'] += duration
        metric['min_duration'] = min(metric['min_duration'], duration)
        metric['max_duration'] = max(metric['max_duration'], duration)
        
        if success:
            metric['success_count'] += 1
        else:
            metric['error_count'] += 1
    
    def get_metrics(self, func_name: str = None) -> dict:
        """Get performance metrics"""
        if func_name:
            metric = self.metrics.get(func_name, {})
            if metric and metric['call_count'] > 0:
                metric['avg_duration'] = metric['total_duration'] / metric['call_count']
                metric['success_rate'] = metric['success_count'] / metric['call_count']
            return metric
        
        return self.metrics


def monitor_performance(monitor: Optional[PerformanceMonitor] = None):
    """Decorator to monitor function performance"""
    
    def decorator(func: Callable[..., T]) -> Callable[..., T]:
        _monitor = monitor or PerformanceMonitor()
        
        @functools.wraps(func)
        async def wrapper(*args, **kwargs):
            start_time = datetime.now(timezone.utc)
            success = True
            
            try:
                result = await func(*args, **kwargs)
                return result
            except Exception as e:
                success = False
                raise
            finally:
                end_time = datetime.now(timezone.utc)
                duration = (end_time - start_time).total_seconds()
                _monitor.record_execution(func.__name__, duration, success)
                
                logger.debug("Function performance", 
                           function=func.__name__, 
                           duration=duration, 
                           success=success)
        
        # Add metrics access
        wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__)
        
        return wrapper
    
    return decorator


# Global instances
global_cache = AsyncCache(default_ttl=300)
global_connection_pool = ConnectionPool(max_connections=20)
global_performance_monitor = PerformanceMonitor()
Improve the traffic fetching system 2025-08-10 17:31:38 +02:00			`# ================================================================`
			`# services/data/app/core/performance.py`
			`# ================================================================`
			`"""`
			`Performance optimization utilities for async operations`
			`"""`

			`import asyncio`
			`import functools`
			`from typing import Any, Callable, Dict, Optional, TypeVar`
			`from datetime import datetime, timedelta, timezone`
			`import hashlib`
			`import json`
			`import structlog`

			`logger = structlog.get_logger()`

			`T = TypeVar('T')`


			`class AsyncCache:`
			`"""Simple in-memory async cache with TTL"""`

			`def __init__(self, default_ttl: int = 300):`
			`self.cache: Dict[str, Dict[str, Any]] = {}`
			`self.default_ttl = default_ttl`

			`def _generate_key(self, args, *kwargs) -> str:`
			`"""Generate cache key from arguments"""`
			`key_data = {`
			`'args': args,`
			`'kwargs': sorted(kwargs.items())`
			`}`
			`key_string = json.dumps(key_data, sort_keys=True, default=str)`
			`return hashlib.md5(key_string.encode()).hexdigest()`

			`def _is_expired(self, entry: Dict[str, Any]) -> bool:`
			`"""Check if cache entry is expired"""`
			`expires_at = entry.get('expires_at')`
			`if not expires_at:`
			`return True`
			`return datetime.now(timezone.utc) > expires_at`

			`async def get(self, key: str) -> Optional[Any]:`
			`"""Get value from cache"""`
			`if key in self.cache:`
			`entry = self.cache[key]`
			`if not self._is_expired(entry):`
			`logger.debug("Cache hit", cache_key=key)`
			`return entry['value']`
			`else:`
			`# Clean up expired entry`
			`del self.cache[key]`
			`logger.debug("Cache expired", cache_key=key)`

			`logger.debug("Cache miss", cache_key=key)`
			`return None`

			`async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:`
			`"""Set value in cache"""`
			`ttl = ttl or self.default_ttl`
			`expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)`

			`self.cache[key] = {`
			`'value': value,`
			`'expires_at': expires_at,`
			`'created_at': datetime.now(timezone.utc)`
			`}`

			`logger.debug("Cache set", cache_key=key, ttl=ttl)`

			`async def clear(self) -> None:`
			`"""Clear all cache entries"""`
			`self.cache.clear()`
			`logger.info("Cache cleared")`

			`async def cleanup_expired(self) -> int:`
			`"""Clean up expired entries"""`
			`expired_keys = [`
			`key for key, entry in self.cache.items()`
			`if self._is_expired(entry)`
			`]`

			`for key in expired_keys:`
			`del self.cache[key]`

			`if expired_keys:`
			`logger.info("Cleaned up expired cache entries", count=len(expired_keys))`

			`return len(expired_keys)`


			`def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None):`
			`"""Decorator for caching async function results"""`

			`def decorator(func: Callable[..., T]) -> Callable[..., T]:`
			`_cache = cache_instance or AsyncCache(ttl)`

			`@functools.wraps(func)`
			`async def wrapper(args, *kwargs):`
			`# Generate cache key`
			`cache_key = _cache._generate_key(func.__name__, args, *kwargs)`

			`# Try to get from cache`
			`cached_result = await _cache.get(cache_key)`
			`if cached_result is not None:`
			`return cached_result`

			`# Execute function and cache result`
			`result = await func(args, *kwargs)`
			`await _cache.set(cache_key, result, ttl)`

			`return result`

			`# Add cache management methods`
			`wrapper.cache_clear = _cache.clear`
			`wrapper.cache_cleanup = _cache.cleanup_expired`

			`return wrapper`

			`return decorator`


			`class ConnectionPool:`
			`"""Simple connection pool for HTTP clients"""`

			`def __init__(self, max_connections: int = 10):`
			`self.max_connections = max_connections`
			`self.semaphore = asyncio.Semaphore(max_connections)`
			`self._active_connections = 0`

			`async def acquire(self):`
			`"""Acquire a connection slot"""`
			`await self.semaphore.acquire()`
			`self._active_connections += 1`
			`logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections)`

			`async def release(self):`
			`"""Release a connection slot"""`
			`self.semaphore.release()`
			`self._active_connections = max(0, self._active_connections - 1)`
			`logger.debug("Connection released", active=self._active_connections, max=self.max_connections)`

			`async def __aenter__(self):`
			`await self.acquire()`
			`return self`

			`async def __aexit__(self, exc_type, exc_val, exc_tb):`
			`await self.release()`


			`def rate_limit(calls: int, period: int):`
			`"""Rate limiting decorator"""`

			`def decorator(func: Callable[..., T]) -> Callable[..., T]:`
			`call_times = []`
			`lock = asyncio.Lock()`

			`@functools.wraps(func)`
			`async def wrapper(args, *kwargs):`
			`async with lock:`
			`now = datetime.now(timezone.utc)`

			`# Remove old call times`
			`cutoff = now - timedelta(seconds=period)`
			`call_times[:] = [t for t in call_times if t > cutoff]`

			`# Check rate limit`
			`if len(call_times) >= calls:`
			`sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds()`
			`if sleep_time > 0:`
			`logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time)`
			`await asyncio.sleep(sleep_time)`

			`# Record this call`
			`call_times.append(now)`

			`return await func(args, *kwargs)`

			`return wrapper`

			`return decorator`


			`async def batch_process(`
			`items: list,`
			`process_func: Callable,`
			`batch_size: int = 10,`
			`max_concurrency: int = 5`
			`) -> list:`
			`"""Process items in batches with controlled concurrency"""`

			`results = []`
			`semaphore = asyncio.Semaphore(max_concurrency)`

			`async def process_batch(batch):`
			`async with semaphore:`
			`return await process_func(batch)`

			`# Create batches`
			`batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]`

			`logger.info("Processing items in batches",`
			`total_items=len(items),`
			`batches=len(batches),`
			`batch_size=batch_size,`
			`max_concurrency=max_concurrency)`

			`# Process batches concurrently`
			`batch_results = await asyncio.gather(`
			`*[process_batch(batch) for batch in batches],`
			`return_exceptions=True`
			`)`

			`# Flatten results`
			`for batch_result in batch_results:`
			`if isinstance(batch_result, Exception):`
			`logger.error("Batch processing error", error=str(batch_result))`
			`continue`

			`if isinstance(batch_result, list):`
			`results.extend(batch_result)`
			`else:`
			`results.append(batch_result)`

			`logger.info("Batch processing completed",`
			`processed_items=len(results),`
			`total_batches=len(batches))`

			`return results`


			`class PerformanceMonitor:`
			`"""Simple performance monitoring for async functions"""`

			`def __init__(self):`
			`self.metrics = {}`

			`def record_execution(self, func_name: str, duration: float, success: bool = True):`
			`"""Record function execution metrics"""`
			`if func_name not in self.metrics:`
			`self.metrics[func_name] = {`
			`'call_count': 0,`
			`'success_count': 0,`
			`'error_count': 0,`
			`'total_duration': 0.0,`
			`'min_duration': float('inf'),`
			`'max_duration': 0.0`
			`}`

			`metric = self.metrics[func_name]`
			`metric['call_count'] += 1`
			`metric['total_duration'] += duration`
			`metric['min_duration'] = min(metric['min_duration'], duration)`
			`metric['max_duration'] = max(metric['max_duration'], duration)`

			`if success:`
			`metric['success_count'] += 1`
			`else:`
			`metric['error_count'] += 1`

			`def get_metrics(self, func_name: str = None) -> dict:`
			`"""Get performance metrics"""`
			`if func_name:`
			`metric = self.metrics.get(func_name, {})`
			`if metric and metric['call_count'] > 0:`
			`metric['avg_duration'] = metric['total_duration'] / metric['call_count']`
			`metric['success_rate'] = metric['success_count'] / metric['call_count']`
			`return metric`

			`return self.metrics`


			`def monitor_performance(monitor: Optional[PerformanceMonitor] = None):`
			`"""Decorator to monitor function performance"""`

			`def decorator(func: Callable[..., T]) -> Callable[..., T]:`
			`_monitor = monitor or PerformanceMonitor()`

			`@functools.wraps(func)`
			`async def wrapper(args, *kwargs):`
			`start_time = datetime.now(timezone.utc)`
			`success = True`

			`try:`
			`result = await func(args, *kwargs)`
			`return result`
			`except Exception as e:`
			`success = False`
			`raise`
			`finally:`
			`end_time = datetime.now(timezone.utc)`
			`duration = (end_time - start_time).total_seconds()`
			`_monitor.record_execution(func.__name__, duration, success)`

			`logger.debug("Function performance",`
			`function=func.__name__,`
			`duration=duration,`
			`success=success)`

			`# Add metrics access`
			`wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__)`

			`return wrapper`

			`return decorator`


			`# Global instances`
			`global_cache = AsyncCache(default_ttl=300)`
			`global_connection_pool = ConnectionPool(max_connections=20)`
			`global_performance_monitor = PerformanceMonitor()`