Start generating pytest for training service

2025-07-25 13:31:54 +02:00
parent 37dce4886e
commit 0dc12f4b93
18 changed files with 19378 additions and 572 deletions
--- a/services/training/tests/test_performance.py
+++ b/services/training/tests/test_performance.py
@@ -0,0 +1,630 @@
+# ================================================================
+# services/training/tests/test_performance.py
+# ================================================================
+"""
+Performance and Load Testing for Training Service
+Tests training performance with real-world data volumes
+"""
+
+import pytest
+import asyncio
+import pandas as pd
+import numpy as np
+import time
+from datetime import datetime, timedelta
+from concurrent.futures import ThreadPoolExecutor
+import psutil
+import gc
+from typing import List, Dict, Any
+import logging
+
+from app.ml.trainer import BakeryMLTrainer
+from app.ml.data_processor import BakeryDataProcessor
+from app.services.training_service import TrainingService
+
+
+class TestTrainingPerformance:
+    """Performance tests for training service components"""
+    
+    @pytest.fixture
+    def large_sales_dataset(self):
+        """Generate large dataset for performance testing (2 years of data)"""
+        start_date = datetime(2022, 1, 1)
+        end_date = datetime(2024, 1, 1)
+        
+        date_range = pd.date_range(start=start_date, end=end_date, freq='D')
+        products = [
+            "Pan Integral", "Pan Blanco", "Croissant", "Magdalenas", 
+            "Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
+            "Donuts", "Berlinas", "Napolitanas", "Ensaimadas"
+        ]
+        
+        data = []
+        for date in date_range:
+            for product in products:
+                # Realistic sales simulation
+                base_quantity = np.random.randint(5, 150)
+                
+                # Seasonal patterns
+                if date.month in [12, 1]:  # Winter/Holiday season
+                    base_quantity *= 1.4
+                elif date.month in [6, 7, 8]:  # Summer
+                    base_quantity *= 0.8
+                
+                # Weekly patterns
+                if date.weekday() >= 5:  # Weekends
+                    base_quantity *= 1.2
+                elif date.weekday() == 0:  # Monday
+                    base_quantity *= 0.7
+                
+                # Add noise
+                quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1)))
+                
+                data.append({
+                    "date": date.strftime("%Y-%m-%d"),
+                    "product": product,
+                    "quantity": quantity,
+                    "revenue": round(quantity * np.random.uniform(1.5, 8.0), 2),
+                    "temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1),
+                    "precipitation": max(0, np.random.exponential(0.8)),
+                    "is_weekend": date.weekday() >= 5,
+                    "is_holiday": self._is_spanish_holiday(date)
+                })
+        
+        return pd.DataFrame(data)
+    
+    def _is_spanish_holiday(self, date: datetime) -> bool:
+        """Check if date is a Spanish holiday"""
+        holidays = [
+            (1, 1),   # New Year
+            (1, 6),   # Epiphany
+            (5, 1),   # Labor Day
+            (8, 15),  # Assumption
+            (10, 12), # National Day
+            (11, 1),  # All Saints
+            (12, 6),  # Constitution Day
+            (12, 8),  # Immaculate Conception
+            (12, 25), # Christmas
+        ]
+        return (date.month, date.day) in holidays
+    
+    @pytest.mark.asyncio
+    async def test_single_product_training_performance(self, large_sales_dataset):
+        """Test performance of single product training with large dataset"""
+        
+        trainer = BakeryMLTrainer()
+        product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy()
+        
+        # Measure memory before training
+        process = psutil.Process()
+        memory_before = process.memory_info().rss / 1024 / 1024  # MB
+        
+        start_time = time.time()
+        
+        result = await trainer.train_single_product(
+            tenant_id="perf_test_tenant",
+            product_name="Pan Integral",
+            sales_data=product_data,
+            config={
+                "include_weather": True,
+                "include_traffic": False,  # Skip traffic for performance
+                "seasonality_mode": "additive"
+            }
+        )
+        
+        end_time = time.time()
+        training_duration = end_time - start_time
+        
+        # Measure memory after training
+        memory_after = process.memory_info().rss / 1024 / 1024  # MB
+        memory_used = memory_after - memory_before
+        
+        # Performance assertions
+        assert training_duration < 120, f"Training took too long: {training_duration:.2f}s"
+        assert memory_used < 500, f"Memory usage too high: {memory_used:.2f}MB"
+        assert result['status'] == 'completed'
+        
+        # Quality assertions
+        metrics = result['metrics']
+        assert metrics['mape'] < 50, f"MAPE too high: {metrics['mape']:.2f}%"
+        
+        print(f"Performance Results:")
+        print(f"  Training Duration: {training_duration:.2f}s")
+        print(f"  Memory Used: {memory_used:.2f}MB")
+        print(f"  Data Points: {len(product_data)}")
+        print(f"  MAPE: {metrics['mape']:.2f}%")
+        print(f"  RMSE: {metrics['rmse']:.2f}")
+    
+    @pytest.mark.asyncio
+    async def test_concurrent_training_performance(self, large_sales_dataset):
+        """Test performance of concurrent training jobs"""
+        
+        trainer = BakeryMLTrainer()
+        products = ["Pan Integral", "Croissant", "Magdalenas"]
+        
+        async def train_product(product_name: str):
+            """Train a single product"""
+            product_data = large_sales_dataset[large_sales_dataset['product'] == product_name].copy()
+            
+            start_time = time.time()
+            result = await trainer.train_single_product(
+                tenant_id=f"concurrent_test_{product_name.replace(' ', '_').lower()}",
+                product_name=product_name,
+                sales_data=product_data,
+                config={"include_weather": True, "include_traffic": False}
+            )
+            end_time = time.time()
+            
+            return {
+                'product': product_name,
+                'duration': end_time - start_time,
+                'status': result['status'],
+                'metrics': result.get('metrics', {})
+            }
+        
+        # Run concurrent training
+        start_time = time.time()
+        tasks = [train_product(product) for product in products]
+        results = await asyncio.gather(*tasks)
+        total_time = time.time() - start_time
+        
+        # Verify all trainings completed
+        for result in results:
+            assert result['status'] == 'completed'
+            assert result['duration'] < 120  # Individual training time
+        
+        # Concurrent execution should be faster than sequential
+        sequential_time_estimate = sum(r['duration'] for r in results)
+        efficiency = sequential_time_estimate / total_time
+        
+        assert efficiency > 1.5, f"Concurrency efficiency too low: {efficiency:.2f}x"
+        
+        print(f"Concurrent Training Results:")
+        print(f"  Total Time: {total_time:.2f}s")
+        print(f"  Sequential Estimate: {sequential_time_estimate:.2f}s")
+        print(f"  Efficiency: {efficiency:.2f}x")
+        
+        for result in results:
+            print(f"  {result['product']}: {result['duration']:.2f}s, MAPE: {result['metrics'].get('mape', 'N/A'):.2f}%")
+    
+    @pytest.mark.asyncio
+    async def test_data_processing_scalability(self, large_sales_dataset):
+        """Test data processing performance with increasing data sizes"""
+        
+        data_processor = BakeryDataProcessor()
+        
+        # Test with different data sizes
+        data_sizes = [1000, 5000, 10000, 20000, len(large_sales_dataset)]
+        performance_results = []
+        
+        for size in data_sizes:
+            # Take a sample of the specified size
+            sample_data = large_sales_dataset.head(size).copy()
+            
+            start_time = time.time()
+            
+            # Process the data
+            processed_data = await data_processor.prepare_training_data(
+                sales_data=sample_data,
+                include_weather=True,
+                include_traffic=True,
+                tenant_id="scalability_test",
+                product_name="Pan Integral"
+            )
+            
+            processing_time = time.time() - start_time
+            
+            performance_results.append({
+                'data_size': size,
+                'processing_time': processing_time,
+                'processed_rows': len(processed_data),
+                'throughput': size / processing_time if processing_time > 0 else 0
+            })
+        
+        # Verify linear or sub-linear scaling
+        for i in range(1, len(performance_results)):
+            prev_result = performance_results[i-1]
+            curr_result = performance_results[i]
+            
+            size_ratio = curr_result['data_size'] / prev_result['data_size']
+            time_ratio = curr_result['processing_time'] / prev_result['processing_time']
+            
+            # Processing time should scale better than linearly
+            assert time_ratio < size_ratio * 1.5, f"Poor scaling at size {curr_result['data_size']}"
+        
+        print("Data Processing Scalability Results:")
+        for result in performance_results:
+            print(f"  Size: {result['data_size']:,} rows, Time: {result['processing_time']:.2f}s, "
+                  f"Throughput: {result['throughput']:.0f} rows/s")
+    
+    @pytest.mark.asyncio
+    async def test_memory_usage_optimization(self, large_sales_dataset):
+        """Test memory usage optimization during training"""
+        
+        trainer = BakeryMLTrainer()
+        process = psutil.Process()
+        
+        # Baseline memory
+        gc.collect()  # Force garbage collection
+        baseline_memory = process.memory_info().rss / 1024 / 1024  # MB
+        
+        memory_snapshots = [{'stage': 'baseline', 'memory_mb': baseline_memory}]
+        
+        # Load data
+        product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy()
+        current_memory = process.memory_info().rss / 1024 / 1024
+        memory_snapshots.append({'stage': 'data_loaded', 'memory_mb': current_memory})
+        
+        # Train model
+        result = await trainer.train_single_product(
+            tenant_id="memory_test_tenant",
+            product_name="Pan Integral",
+            sales_data=product_data,
+            config={"include_weather": True, "include_traffic": True}
+        )
+        
+        current_memory = process.memory_info().rss / 1024 / 1024
+        memory_snapshots.append({'stage': 'model_trained', 'memory_mb': current_memory})
+        
+        # Cleanup
+        del product_data
+        del result
+        gc.collect()
+        
+        final_memory = process.memory_info().rss / 1024 / 1024
+        memory_snapshots.append({'stage': 'cleanup', 'memory_mb': final_memory})
+        
+        # Memory assertions
+        peak_memory = max(snapshot['memory_mb'] for snapshot in memory_snapshots)
+        memory_increase = peak_memory - baseline_memory
+        memory_after_cleanup = final_memory - baseline_memory
+        
+        assert memory_increase < 800, f"Peak memory increase too high: {memory_increase:.2f}MB"
+        assert memory_after_cleanup < 100, f"Memory not properly cleaned up: {memory_after_cleanup:.2f}MB"
+        
+        print("Memory Usage Analysis:")
+        for snapshot in memory_snapshots:
+            print(f"  {snapshot['stage']}: {snapshot['memory_mb']:.2f}MB")
+        print(f"  Peak increase: {memory_increase:.2f}MB")
+        print(f"  After cleanup: {memory_after_cleanup:.2f}MB")
+    
+    @pytest.mark.asyncio
+    async def test_training_service_throughput(self, large_sales_dataset):
+        """Test training service throughput with multiple requests"""
+        
+        training_service = TrainingService()
+        
+        # Simulate multiple training requests
+        num_requests = 5
+        products = ["Pan Integral", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate"]
+        
+        async def execute_training_request(request_id: int, product: str):
+            """Execute a single training request"""
+            product_data = large_sales_dataset[large_sales_dataset['product'] == product].copy()
+            
+            with patch.object(training_service, '_fetch_sales_data', return_value=product_data):
+                start_time = time.time()
+                
+                result = await training_service.execute_training_job(
+                    db=None,  # Mock DB session
+                    tenant_id=f"throughput_test_tenant_{request_id}",
+                    job_id=f"job_{request_id}_{product.replace(' ', '_').lower()}",
+                    request={
+                        'products': [product],
+                        'include_weather': True,
+                        'include_traffic': False,
+                        'config': {'seasonality_mode': 'additive'}
+                    }
+                )
+                
+                duration = time.time() - start_time
+                return {
+                    'request_id': request_id,
+                    'product': product,
+                    'duration': duration,
+                    'status': result.get('status', 'unknown'),
+                    'models_trained': len(result.get('models_trained', []))
+                }
+        
+        # Execute requests concurrently
+        start_time = time.time()
+        tasks = [
+            execute_training_request(i, products[i % len(products)]) 
+            for i in range(num_requests)
+        ]
+        results = await asyncio.gather(*tasks)
+        total_time = time.time() - start_time
+        
+        # Calculate throughput metrics
+        successful_requests = sum(1 for r in results if r['status'] == 'completed')
+        throughput = successful_requests / total_time  # requests per second
+        
+        # Performance assertions
+        assert successful_requests >= num_requests * 0.8, "Too many failed requests"
+        assert throughput >= 0.1, f"Throughput too low: {throughput:.3f} req/s"
+        assert total_time < 300, f"Total time too long: {total_time:.2f}s"
+        
+        print(f"Training Service Throughput Results:")
+        print(f"  Total Requests: {num_requests}")
+        print(f"  Successful: {successful_requests}")
+        print(f"  Total Time: {total_time:.2f}s")
+        print(f"  Throughput: {throughput:.3f} req/s")
+        print(f"  Average Request Time: {total_time/num_requests:.2f}s")
+    
+    @pytest.mark.asyncio
+    async def test_large_dataset_edge_cases(self, large_sales_dataset):
+        """Test handling of edge cases with large datasets"""
+        
+        data_processor = BakeryDataProcessor()
+        
+        # Test 1: Dataset with many missing values
+        corrupted_data = large_sales_dataset.copy()
+        # Introduce 30% missing values randomly
+        mask = np.random.random(len(corrupted_data)) < 0.3
+        corrupted_data.loc[mask, 'quantity'] = np.nan
+        
+        start_time = time.time()
+        result = await data_processor.validate_data_quality(corrupted_data)
+        validation_time = time.time() - start_time
+        
+        assert validation_time < 10, f"Validation too slow: {validation_time:.2f}s"
+        assert result['is_valid'] is False
+        assert 'high_missing_data' in result['issues']
+        
+        # Test 2: Dataset with extreme outliers
+        outlier_data = large_sales_dataset.copy()
+        # Add extreme outliers (100x normal values)
+        outlier_indices = np.random.choice(len(outlier_data), size=int(len(outlier_data) * 0.01), replace=False)
+        outlier_data.loc[outlier_indices, 'quantity'] *= 100
+        
+        start_time = time.time()
+        cleaned_data = await data_processor.clean_outliers(outlier_data)
+        cleaning_time = time.time() - start_time
+        
+        assert cleaning_time < 15, f"Outlier cleaning too slow: {cleaning_time:.2f}s"
+        assert len(cleaned_data) > len(outlier_data) * 0.95  # Should retain most data
+        
+        # Test 3: Very sparse data (many products with few sales)
+        sparse_data = large_sales_dataset.copy()
+        # Keep only 10% of data for each product randomly
+        sparse_data = sparse_data.groupby('product').apply(
+            lambda x: x.sample(n=max(1, int(len(x) * 0.1)))
+        ).reset_index(drop=True)
+        
+        start_time = time.time()
+        validation_result = await data_processor.validate_data_quality(sparse_data)
+        sparse_validation_time = time.time() - start_time
+        
+        assert sparse_validation_time < 5, f"Sparse data validation too slow: {sparse_validation_time:.2f}s"
+        
+        print("Edge Case Performance Results:")
+        print(f"  Corrupted data validation: {validation_time:.2f}s")
+        print(f"  Outlier cleaning: {cleaning_time:.2f}s")
+        print(f"  Sparse data validation: {sparse_validation_time:.2f}s")
+
+
+class TestTrainingServiceLoad:
+    """Load testing for training service under stress"""
+    
+    @pytest.mark.asyncio
+    async def test_sustained_load_training(self, large_sales_dataset):
+        """Test training service under sustained load"""
+        
+        trainer = BakeryMLTrainer()
+        
+        # Define load test parameters
+        duration_minutes = 2  # Run for 2 minutes
+        requests_per_minute = 3
+        
+        products = ["Pan Integral", "Croissant", "Magdalenas"]
+        
+        async def sustained_training_worker(worker_id: int, duration: float):
+            """Worker that continuously submits training requests"""
+            start_time = time.time()
+            completed_requests = 0
+            failed_requests = 0
+            
+            while time.time() - start_time < duration:
+                try:
+                    product = products[completed_requests % len(products)]
+                    product_data = large_sales_dataset[
+                        large_sales_dataset['product'] == product
+                    ].copy()
+                    
+                    result = await trainer.train_single_product(
+                        tenant_id=f"load_test_worker_{worker_id}",
+                        product_name=product,
+                        sales_data=product_data,
+                        config={"include_weather": False, "include_traffic": False}  # Minimal config for speed
+                    )
+                    
+                    if result['status'] == 'completed':
+                        completed_requests += 1
+                    else:
+                        failed_requests += 1
+                        
+                except Exception as e:
+                    failed_requests += 1
+                    logging.error(f"Training request failed: {e}")
+                
+                # Wait before next request
+                await asyncio.sleep(60 / requests_per_minute)
+            
+            return {
+                'worker_id': worker_id,
+                'completed': completed_requests,
+                'failed': failed_requests,
+                'duration': time.time() - start_time
+            }
+        
+        # Start multiple workers
+        num_workers = 2
+        duration_seconds = duration_minutes * 60
+        
+        start_time = time.time()
+        tasks = [
+            sustained_training_worker(i, duration_seconds) 
+            for i in range(num_workers)
+        ]
+        results = await asyncio.gather(*tasks)
+        total_time = time.time() - start_time
+        
+        # Analyze results
+        total_completed = sum(r['completed'] for r in results)
+        total_failed = sum(r['failed'] for r in results)
+        success_rate = total_completed / (total_completed + total_failed) if (total_completed + total_failed) > 0 else 0
+        
+        # Performance assertions
+        assert success_rate >= 0.8, f"Success rate too low: {success_rate:.2%}"
+        assert total_completed >= duration_minutes * requests_per_minute * num_workers * 0.7, "Throughput too low"
+        
+        print(f"Sustained Load Test Results:")
+        print(f"  Duration: {total_time:.2f}s")
+        print(f"  Workers: {num_workers}")
+        print(f"  Completed Requests: {total_completed}")
+        print(f"  Failed Requests: {total_failed}")
+        print(f"  Success Rate: {success_rate:.2%}")
+        print(f"  Average Throughput: {total_completed/total_time:.2f} req/s")
+    
+    @pytest.mark.asyncio
+    async def test_resource_exhaustion_recovery(self, large_sales_dataset):
+        """Test service recovery from resource exhaustion"""
+        
+        trainer = BakeryMLTrainer()
+        
+        # Simulate resource exhaustion by running many concurrent requests
+        num_concurrent = 10  # High concurrency to stress the system
+        
+        async def resource_intensive_task(task_id: int):
+            """Task designed to consume resources"""
+            try:
+                # Use all products to increase memory usage
+                all_products_data = large_sales_dataset.copy()
+                
+                result = await trainer.train_tenant_models(
+                    tenant_id=f"resource_test_{task_id}",
+                    sales_data=all_products_data,
+                    config={
+                        "train_all_products": True,
+                        "include_weather": True,
+                        "include_traffic": True
+                    }
+                )
+                
+                return {'task_id': task_id, 'status': 'completed', 'error': None}
+                
+            except Exception as e:
+                return {'task_id': task_id, 'status': 'failed', 'error': str(e)}
+        
+        # Launch all tasks simultaneously
+        start_time = time.time()
+        tasks = [resource_intensive_task(i) for i in range(num_concurrent)]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        duration = time.time() - start_time
+        
+        # Analyze results
+        completed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'completed')
+        failed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'failed')
+        exceptions = sum(1 for r in results if isinstance(r, Exception))
+        
+        # The system should handle some failures gracefully
+        # but should complete at least some requests
+        total_processed = completed + failed + exceptions
+        processing_rate = total_processed / num_concurrent
+        
+        assert processing_rate >= 0.5, f"Too many requests not processed: {processing_rate:.2%}"
+        assert duration < 600, f"Recovery took too long: {duration:.2f}s"  # 10 minutes max
+        
+        print(f"Resource Exhaustion Test Results:")
+        print(f"  Concurrent Requests: {num_concurrent}")
+        print(f"  Completed: {completed}")
+        print(f"  Failed: {failed}")
+        print(f"  Exceptions: {exceptions}")
+        print(f"  Duration: {duration:.2f}s")
+        print(f"  Processing Rate: {processing_rate:.2%}")
+
+
+# ================================================================
+# BENCHMARK UTILITIES
+# ================================================================
+
+class PerformanceBenchmark:
+    """Utility class for performance benchmarking"""
+    
+    @staticmethod
+    def measure_execution_time(func):
+        """Decorator to measure execution time"""
+        async def wrapper(*args, **kwargs):
+            start_time = time.time()
+            result = await func(*args, **kwargs)
+            execution_time = time.time() - start_time
+            
+            if hasattr(result, 'update') and isinstance(result, dict):
+                result['execution_time'] = execution_time
+            
+            return result
+        return wrapper
+    
+    @staticmethod
+    def memory_profiler(func):
+        """Decorator to profile memory usage"""
+        async def wrapper(*args, **kwargs):
+            process = psutil.Process()
+            
+            # Memory before
+            gc.collect()
+            memory_before = process.memory_info().rss / 1024 / 1024
+            
+            result = await func(*args, **kwargs)
+            
+            # Memory after
+            memory_after = process.memory_info().rss / 1024 / 1024
+            memory_used = memory_after - memory_before
+            
+            if hasattr(result, 'update') and isinstance(result, dict):
+                result['memory_used_mb'] = memory_used
+            
+            return result
+        return wrapper
+
+
+# ================================================================
+# STANDALONE EXECUTION
+# ================================================================
+
+if __name__ == "__main__":
+    """
+    Run performance tests as standalone script
+    Usage: python test_performance.py
+    """
+    import sys
+    import os
+    from unittest.mock import patch
+    
+    # Add the training service root to Python path
+    training_service_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    sys.path.insert(0, training_service_root)
+    
+    print("=" * 60)
+    print("TRAINING SERVICE PERFORMANCE TEST SUITE")
+    print("=" * 60)
+    
+    # Setup logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+    )
+    
+    # Run performance tests
+    pytest.main([
+        __file__,
+        "-v",
+        "--tb=short",
+        "-s",  # Don't capture output
+        "--durations=10",  # Show 10 slowest tests
+        "-m", "not slow",  # Skip slow tests unless specifically requested
+    ])
+    
+    print("\n" + "=" * 60)
+    print("PERFORMANCE TESTING COMPLETE")
+    print("=" * 60)