Improve the traffic fetching system

2025-08-10 17:31:38 +02:00
parent 312fdc8ef3
commit 3c2acc934a
16 changed files with 3866 additions and 1981 deletions
--- a/services/data/app/core/performance.py
+++ b/services/data/app/core/performance.py
@@ -0,0 +1,312 @@
+# ================================================================
+# services/data/app/core/performance.py
+# ================================================================
+"""
+Performance optimization utilities for async operations
+"""
+
+import asyncio
+import functools
+from typing import Any, Callable, Dict, Optional, TypeVar
+from datetime import datetime, timedelta, timezone
+import hashlib
+import json
+import structlog
+
+logger = structlog.get_logger()
+
+T = TypeVar('T')
+
+
+class AsyncCache:
+    """Simple in-memory async cache with TTL"""
+    
+    def __init__(self, default_ttl: int = 300):
+        self.cache: Dict[str, Dict[str, Any]] = {}
+        self.default_ttl = default_ttl
+    
+    def _generate_key(self, *args, **kwargs) -> str:
+        """Generate cache key from arguments"""
+        key_data = {
+            'args': args,
+            'kwargs': sorted(kwargs.items())
+        }
+        key_string = json.dumps(key_data, sort_keys=True, default=str)
+        return hashlib.md5(key_string.encode()).hexdigest()
+    
+    def _is_expired(self, entry: Dict[str, Any]) -> bool:
+        """Check if cache entry is expired"""
+        expires_at = entry.get('expires_at')
+        if not expires_at:
+            return True
+        return datetime.now(timezone.utc) > expires_at
+    
+    async def get(self, key: str) -> Optional[Any]:
+        """Get value from cache"""
+        if key in self.cache:
+            entry = self.cache[key]
+            if not self._is_expired(entry):
+                logger.debug("Cache hit", cache_key=key)
+                return entry['value']
+            else:
+                # Clean up expired entry
+                del self.cache[key]
+                logger.debug("Cache expired", cache_key=key)
+        
+        logger.debug("Cache miss", cache_key=key)
+        return None
+    
+    async def set(self, key: str, value: Any, ttl: Optional[int] = None) -> None:
+        """Set value in cache"""
+        ttl = ttl or self.default_ttl
+        expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl)
+        
+        self.cache[key] = {
+            'value': value,
+            'expires_at': expires_at,
+            'created_at': datetime.now(timezone.utc)
+        }
+        
+        logger.debug("Cache set", cache_key=key, ttl=ttl)
+    
+    async def clear(self) -> None:
+        """Clear all cache entries"""
+        self.cache.clear()
+        logger.info("Cache cleared")
+    
+    async def cleanup_expired(self) -> int:
+        """Clean up expired entries"""
+        expired_keys = [
+            key for key, entry in self.cache.items()
+            if self._is_expired(entry)
+        ]
+        
+        for key in expired_keys:
+            del self.cache[key]
+        
+        if expired_keys:
+            logger.info("Cleaned up expired cache entries", count=len(expired_keys))
+        
+        return len(expired_keys)
+
+
+def async_cache(ttl: int = 300, cache_instance: Optional[AsyncCache] = None):
+    """Decorator for caching async function results"""
+    
+    def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        _cache = cache_instance or AsyncCache(ttl)
+        
+        @functools.wraps(func)
+        async def wrapper(*args, **kwargs):
+            # Generate cache key
+            cache_key = _cache._generate_key(func.__name__, *args, **kwargs)
+            
+            # Try to get from cache
+            cached_result = await _cache.get(cache_key)
+            if cached_result is not None:
+                return cached_result
+            
+            # Execute function and cache result
+            result = await func(*args, **kwargs)
+            await _cache.set(cache_key, result, ttl)
+            
+            return result
+        
+        # Add cache management methods
+        wrapper.cache_clear = _cache.clear
+        wrapper.cache_cleanup = _cache.cleanup_expired
+        
+        return wrapper
+    
+    return decorator
+
+
+class ConnectionPool:
+    """Simple connection pool for HTTP clients"""
+    
+    def __init__(self, max_connections: int = 10):
+        self.max_connections = max_connections
+        self.semaphore = asyncio.Semaphore(max_connections)
+        self._active_connections = 0
+    
+    async def acquire(self):
+        """Acquire a connection slot"""
+        await self.semaphore.acquire()
+        self._active_connections += 1
+        logger.debug("Connection acquired", active=self._active_connections, max=self.max_connections)
+    
+    async def release(self):
+        """Release a connection slot"""
+        self.semaphore.release()
+        self._active_connections = max(0, self._active_connections - 1)
+        logger.debug("Connection released", active=self._active_connections, max=self.max_connections)
+    
+    async def __aenter__(self):
+        await self.acquire()
+        return self
+    
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.release()
+
+
+def rate_limit(calls: int, period: int):
+    """Rate limiting decorator"""
+    
+    def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        call_times = []
+        lock = asyncio.Lock()
+        
+        @functools.wraps(func)
+        async def wrapper(*args, **kwargs):
+            async with lock:
+                now = datetime.now(timezone.utc)
+                
+                # Remove old call times
+                cutoff = now - timedelta(seconds=period)
+                call_times[:] = [t for t in call_times if t > cutoff]
+                
+                # Check rate limit
+                if len(call_times) >= calls:
+                    sleep_time = (call_times[0] + timedelta(seconds=period) - now).total_seconds()
+                    if sleep_time > 0:
+                        logger.warning("Rate limit reached, sleeping", sleep_time=sleep_time)
+                        await asyncio.sleep(sleep_time)
+                
+                # Record this call
+                call_times.append(now)
+            
+            return await func(*args, **kwargs)
+        
+        return wrapper
+    
+    return decorator
+
+
+async def batch_process(
+    items: list,
+    process_func: Callable,
+    batch_size: int = 10,
+    max_concurrency: int = 5
+) -> list:
+    """Process items in batches with controlled concurrency"""
+    
+    results = []
+    semaphore = asyncio.Semaphore(max_concurrency)
+    
+    async def process_batch(batch):
+        async with semaphore:
+            return await process_func(batch)
+    
+    # Create batches
+    batches = [items[i:i + batch_size] for i in range(0, len(items), batch_size)]
+    
+    logger.info("Processing items in batches", 
+               total_items=len(items), 
+               batches=len(batches), 
+               batch_size=batch_size,
+               max_concurrency=max_concurrency)
+    
+    # Process batches concurrently
+    batch_results = await asyncio.gather(
+        *[process_batch(batch) for batch in batches],
+        return_exceptions=True
+    )
+    
+    # Flatten results
+    for batch_result in batch_results:
+        if isinstance(batch_result, Exception):
+            logger.error("Batch processing error", error=str(batch_result))
+            continue
+        
+        if isinstance(batch_result, list):
+            results.extend(batch_result)
+        else:
+            results.append(batch_result)
+    
+    logger.info("Batch processing completed", 
+               processed_items=len(results), 
+               total_batches=len(batches))
+    
+    return results
+
+
+class PerformanceMonitor:
+    """Simple performance monitoring for async functions"""
+    
+    def __init__(self):
+        self.metrics = {}
+    
+    def record_execution(self, func_name: str, duration: float, success: bool = True):
+        """Record function execution metrics"""
+        if func_name not in self.metrics:
+            self.metrics[func_name] = {
+                'call_count': 0,
+                'success_count': 0,
+                'error_count': 0,
+                'total_duration': 0.0,
+                'min_duration': float('inf'),
+                'max_duration': 0.0
+            }
+        
+        metric = self.metrics[func_name]
+        metric['call_count'] += 1
+        metric['total_duration'] += duration
+        metric['min_duration'] = min(metric['min_duration'], duration)
+        metric['max_duration'] = max(metric['max_duration'], duration)
+        
+        if success:
+            metric['success_count'] += 1
+        else:
+            metric['error_count'] += 1
+    
+    def get_metrics(self, func_name: str = None) -> dict:
+        """Get performance metrics"""
+        if func_name:
+            metric = self.metrics.get(func_name, {})
+            if metric and metric['call_count'] > 0:
+                metric['avg_duration'] = metric['total_duration'] / metric['call_count']
+                metric['success_rate'] = metric['success_count'] / metric['call_count']
+            return metric
+        
+        return self.metrics
+
+
+def monitor_performance(monitor: Optional[PerformanceMonitor] = None):
+    """Decorator to monitor function performance"""
+    
+    def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        _monitor = monitor or PerformanceMonitor()
+        
+        @functools.wraps(func)
+        async def wrapper(*args, **kwargs):
+            start_time = datetime.now(timezone.utc)
+            success = True
+            
+            try:
+                result = await func(*args, **kwargs)
+                return result
+            except Exception as e:
+                success = False
+                raise
+            finally:
+                end_time = datetime.now(timezone.utc)
+                duration = (end_time - start_time).total_seconds()
+                _monitor.record_execution(func.__name__, duration, success)
+                
+                logger.debug("Function performance", 
+                           function=func.__name__, 
+                           duration=duration, 
+                           success=success)
+        
+        # Add metrics access
+        wrapper.get_metrics = lambda: _monitor.get_metrics(func.__name__)
+        
+        return wrapper
+    
+    return decorator
+
+
+# Global instances
+global_cache = AsyncCache(default_ttl=300)
+global_connection_pool = ConnectionPool(max_connections=20)
+global_performance_monitor = PerformanceMonitor()