# ================================================================ # services/training/tests/test_performance.py # ================================================================ """ Performance and Load Testing for Training Service Tests training performance with real-world data volumes """ import pytest import asyncio import pandas as pd import numpy as np import time from datetime import datetime, timedelta from concurrent.futures import ThreadPoolExecutor import psutil import gc from typing import List, Dict, Any import logging from app.ml.trainer import BakeryMLTrainer from app.ml.data_processor import BakeryDataProcessor from app.services.training_service import TrainingService class TestTrainingPerformance: """Performance tests for training service components""" @pytest.fixture def large_sales_dataset(self): """Generate large dataset for performance testing (2 years of data)""" start_date = datetime(2022, 1, 1) end_date = datetime(2024, 1, 1) date_range = pd.date_range(start=start_date, end=end_date, freq='D') products = [ "Pan Integral", "Pan Blanco", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras", "Donuts", "Berlinas", "Napolitanas", "Ensaimadas" ] data = [] for date in date_range: for product in products: # Realistic sales simulation base_quantity = np.random.randint(5, 150) # Seasonal patterns if date.month in [12, 1]: # Winter/Holiday season base_quantity *= 1.4 elif date.month in [6, 7, 8]: # Summer base_quantity *= 0.8 # Weekly patterns if date.weekday() >= 5: # Weekends base_quantity *= 1.2 elif date.weekday() == 0: # Monday base_quantity *= 0.7 # Add noise quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1))) data.append({ "date": date.strftime("%Y-%m-%d"), "product": product, "quantity": quantity, "revenue": round(quantity * np.random.uniform(1.5, 8.0), 2), "temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1), "precipitation": max(0, np.random.exponential(0.8)), "is_weekend": date.weekday() >= 5, "is_holiday": self._is_spanish_holiday(date) }) return pd.DataFrame(data) def _is_spanish_holiday(self, date: datetime) -> bool: """Check if date is a Spanish holiday""" holidays = [ (1, 1), # New Year (1, 6), # Epiphany (5, 1), # Labor Day (8, 15), # Assumption (10, 12), # National Day (11, 1), # All Saints (12, 6), # Constitution Day (12, 8), # Immaculate Conception (12, 25), # Christmas ] return (date.month, date.day) in holidays @pytest.mark.asyncio async def test_single_product_training_performance(self, large_sales_dataset): """Test performance of single product training with large dataset""" trainer = BakeryMLTrainer() product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy() # Measure memory before training process = psutil.Process() memory_before = process.memory_info().rss / 1024 / 1024 # MB start_time = time.time() result = await trainer.train_single_product( tenant_id="perf_test_tenant", product_name="Pan Integral", sales_data=product_data, config={ "include_weather": True, "include_traffic": False, # Skip traffic for performance "seasonality_mode": "additive" } ) end_time = time.time() training_duration = end_time - start_time # Measure memory after training memory_after = process.memory_info().rss / 1024 / 1024 # MB memory_used = memory_after - memory_before # Performance assertions assert training_duration < 120, f"Training took too long: {training_duration:.2f}s" assert memory_used < 500, f"Memory usage too high: {memory_used:.2f}MB" assert result['status'] == 'completed' # Quality assertions metrics = result['metrics'] assert metrics['mape'] < 50, f"MAPE too high: {metrics['mape']:.2f}%" print(f"Performance Results:") print(f" Training Duration: {training_duration:.2f}s") print(f" Memory Used: {memory_used:.2f}MB") print(f" Data Points: {len(product_data)}") print(f" MAPE: {metrics['mape']:.2f}%") print(f" RMSE: {metrics['rmse']:.2f}") @pytest.mark.asyncio async def test_concurrent_training_performance(self, large_sales_dataset): """Test performance of concurrent training jobs""" trainer = BakeryMLTrainer() products = ["Pan Integral", "Croissant", "Magdalenas"] async def train_product(product_name: str): """Train a single product""" product_data = large_sales_dataset[large_sales_dataset['product'] == product_name].copy() start_time = time.time() result = await trainer.train_single_product( tenant_id=f"concurrent_test_{product_name.replace(' ', '_').lower()}", product_name=product_name, sales_data=product_data, config={"include_weather": True, "include_traffic": False} ) end_time = time.time() return { 'product': product_name, 'duration': end_time - start_time, 'status': result['status'], 'metrics': result.get('metrics', {}) } # Run concurrent training start_time = time.time() tasks = [train_product(product) for product in products] results = await asyncio.gather(*tasks) total_time = time.time() - start_time # Verify all trainings completed for result in results: assert result['status'] == 'completed' assert result['duration'] < 120 # Individual training time # Concurrent execution should be faster than sequential sequential_time_estimate = sum(r['duration'] for r in results) efficiency = sequential_time_estimate / total_time assert efficiency > 1.5, f"Concurrency efficiency too low: {efficiency:.2f}x" print(f"Concurrent Training Results:") print(f" Total Time: {total_time:.2f}s") print(f" Sequential Estimate: {sequential_time_estimate:.2f}s") print(f" Efficiency: {efficiency:.2f}x") for result in results: print(f" {result['product']}: {result['duration']:.2f}s, MAPE: {result['metrics'].get('mape', 'N/A'):.2f}%") @pytest.mark.asyncio async def test_data_processing_scalability(self, large_sales_dataset): """Test data processing performance with increasing data sizes""" data_processor = BakeryDataProcessor() # Test with different data sizes data_sizes = [1000, 5000, 10000, 20000, len(large_sales_dataset)] performance_results = [] for size in data_sizes: # Take a sample of the specified size sample_data = large_sales_dataset.head(size).copy() start_time = time.time() # Process the data processed_data = await data_processor.prepare_training_data( sales_data=sample_data, include_weather=True, include_traffic=True, tenant_id="scalability_test", product_name="Pan Integral" ) processing_time = time.time() - start_time performance_results.append({ 'data_size': size, 'processing_time': processing_time, 'processed_rows': len(processed_data), 'throughput': size / processing_time if processing_time > 0 else 0 }) # Verify linear or sub-linear scaling for i in range(1, len(performance_results)): prev_result = performance_results[i-1] curr_result = performance_results[i] size_ratio = curr_result['data_size'] / prev_result['data_size'] time_ratio = curr_result['processing_time'] / prev_result['processing_time'] # Processing time should scale better than linearly assert time_ratio < size_ratio * 1.5, f"Poor scaling at size {curr_result['data_size']}" print("Data Processing Scalability Results:") for result in performance_results: print(f" Size: {result['data_size']:,} rows, Time: {result['processing_time']:.2f}s, " f"Throughput: {result['throughput']:.0f} rows/s") @pytest.mark.asyncio async def test_memory_usage_optimization(self, large_sales_dataset): """Test memory usage optimization during training""" trainer = BakeryMLTrainer() process = psutil.Process() # Baseline memory gc.collect() # Force garbage collection baseline_memory = process.memory_info().rss / 1024 / 1024 # MB memory_snapshots = [{'stage': 'baseline', 'memory_mb': baseline_memory}] # Load data product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy() current_memory = process.memory_info().rss / 1024 / 1024 memory_snapshots.append({'stage': 'data_loaded', 'memory_mb': current_memory}) # Train model result = await trainer.train_single_product( tenant_id="memory_test_tenant", product_name="Pan Integral", sales_data=product_data, config={"include_weather": True, "include_traffic": True} ) current_memory = process.memory_info().rss / 1024 / 1024 memory_snapshots.append({'stage': 'model_trained', 'memory_mb': current_memory}) # Cleanup del product_data del result gc.collect() final_memory = process.memory_info().rss / 1024 / 1024 memory_snapshots.append({'stage': 'cleanup', 'memory_mb': final_memory}) # Memory assertions peak_memory = max(snapshot['memory_mb'] for snapshot in memory_snapshots) memory_increase = peak_memory - baseline_memory memory_after_cleanup = final_memory - baseline_memory assert memory_increase < 800, f"Peak memory increase too high: {memory_increase:.2f}MB" assert memory_after_cleanup < 100, f"Memory not properly cleaned up: {memory_after_cleanup:.2f}MB" print("Memory Usage Analysis:") for snapshot in memory_snapshots: print(f" {snapshot['stage']}: {snapshot['memory_mb']:.2f}MB") print(f" Peak increase: {memory_increase:.2f}MB") print(f" After cleanup: {memory_after_cleanup:.2f}MB") @pytest.mark.asyncio async def test_training_service_throughput(self, large_sales_dataset): """Test training service throughput with multiple requests""" training_service = TrainingService() # Simulate multiple training requests num_requests = 5 products = ["Pan Integral", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate"] async def execute_training_request(request_id: int, product: str): """Execute a single training request""" product_data = large_sales_dataset[large_sales_dataset['product'] == product].copy() with patch.object(training_service, '_fetch_sales_data', return_value=product_data): start_time = time.time() result = await training_service.execute_training_job( db=None, # Mock DB session tenant_id=f"throughput_test_tenant_{request_id}", job_id=f"job_{request_id}_{product.replace(' ', '_').lower()}", request={ 'products': [product], 'include_weather': True, 'include_traffic': False, 'config': {'seasonality_mode': 'additive'} } ) duration = time.time() - start_time return { 'request_id': request_id, 'product': product, 'duration': duration, 'status': result.get('status', 'unknown'), 'models_trained': len(result.get('models_trained', [])) } # Execute requests concurrently start_time = time.time() tasks = [ execute_training_request(i, products[i % len(products)]) for i in range(num_requests) ] results = await asyncio.gather(*tasks) total_time = time.time() - start_time # Calculate throughput metrics successful_requests = sum(1 for r in results if r['status'] == 'completed') throughput = successful_requests / total_time # requests per second # Performance assertions assert successful_requests >= num_requests * 0.8, "Too many failed requests" assert throughput >= 0.1, f"Throughput too low: {throughput:.3f} req/s" assert total_time < 300, f"Total time too long: {total_time:.2f}s" print(f"Training Service Throughput Results:") print(f" Total Requests: {num_requests}") print(f" Successful: {successful_requests}") print(f" Total Time: {total_time:.2f}s") print(f" Throughput: {throughput:.3f} req/s") print(f" Average Request Time: {total_time/num_requests:.2f}s") @pytest.mark.asyncio async def test_large_dataset_edge_cases(self, large_sales_dataset): """Test handling of edge cases with large datasets""" data_processor = BakeryDataProcessor() # Test 1: Dataset with many missing values corrupted_data = large_sales_dataset.copy() # Introduce 30% missing values randomly mask = np.random.random(len(corrupted_data)) < 0.3 corrupted_data.loc[mask, 'quantity'] = np.nan start_time = time.time() result = await data_processor.validate_data_quality(corrupted_data) validation_time = time.time() - start_time assert validation_time < 10, f"Validation too slow: {validation_time:.2f}s" assert result['is_valid'] is False assert 'high_missing_data' in result['issues'] # Test 2: Dataset with extreme outliers outlier_data = large_sales_dataset.copy() # Add extreme outliers (100x normal values) outlier_indices = np.random.choice(len(outlier_data), size=int(len(outlier_data) * 0.01), replace=False) outlier_data.loc[outlier_indices, 'quantity'] *= 100 start_time = time.time() cleaned_data = await data_processor.clean_outliers(outlier_data) cleaning_time = time.time() - start_time assert cleaning_time < 15, f"Outlier cleaning too slow: {cleaning_time:.2f}s" assert len(cleaned_data) > len(outlier_data) * 0.95 # Should retain most data # Test 3: Very sparse data (many products with few sales) sparse_data = large_sales_dataset.copy() # Keep only 10% of data for each product randomly sparse_data = sparse_data.groupby('product').apply( lambda x: x.sample(n=max(1, int(len(x) * 0.1))) ).reset_index(drop=True) start_time = time.time() validation_result = await data_processor.validate_data_quality(sparse_data) sparse_validation_time = time.time() - start_time assert sparse_validation_time < 5, f"Sparse data validation too slow: {sparse_validation_time:.2f}s" print("Edge Case Performance Results:") print(f" Corrupted data validation: {validation_time:.2f}s") print(f" Outlier cleaning: {cleaning_time:.2f}s") print(f" Sparse data validation: {sparse_validation_time:.2f}s") class TestTrainingServiceLoad: """Load testing for training service under stress""" @pytest.mark.asyncio async def test_sustained_load_training(self, large_sales_dataset): """Test training service under sustained load""" trainer = BakeryMLTrainer() # Define load test parameters duration_minutes = 2 # Run for 2 minutes requests_per_minute = 3 products = ["Pan Integral", "Croissant", "Magdalenas"] async def sustained_training_worker(worker_id: int, duration: float): """Worker that continuously submits training requests""" start_time = time.time() completed_requests = 0 failed_requests = 0 while time.time() - start_time < duration: try: product = products[completed_requests % len(products)] product_data = large_sales_dataset[ large_sales_dataset['product'] == product ].copy() result = await trainer.train_single_product( tenant_id=f"load_test_worker_{worker_id}", product_name=product, sales_data=product_data, config={"include_weather": False, "include_traffic": False} # Minimal config for speed ) if result['status'] == 'completed': completed_requests += 1 else: failed_requests += 1 except Exception as e: failed_requests += 1 logging.error(f"Training request failed: {e}") # Wait before next request await asyncio.sleep(60 / requests_per_minute) return { 'worker_id': worker_id, 'completed': completed_requests, 'failed': failed_requests, 'duration': time.time() - start_time } # Start multiple workers num_workers = 2 duration_seconds = duration_minutes * 60 start_time = time.time() tasks = [ sustained_training_worker(i, duration_seconds) for i in range(num_workers) ] results = await asyncio.gather(*tasks) total_time = time.time() - start_time # Analyze results total_completed = sum(r['completed'] for r in results) total_failed = sum(r['failed'] for r in results) success_rate = total_completed / (total_completed + total_failed) if (total_completed + total_failed) > 0 else 0 # Performance assertions assert success_rate >= 0.8, f"Success rate too low: {success_rate:.2%}" assert total_completed >= duration_minutes * requests_per_minute * num_workers * 0.7, "Throughput too low" print(f"Sustained Load Test Results:") print(f" Duration: {total_time:.2f}s") print(f" Workers: {num_workers}") print(f" Completed Requests: {total_completed}") print(f" Failed Requests: {total_failed}") print(f" Success Rate: {success_rate:.2%}") print(f" Average Throughput: {total_completed/total_time:.2f} req/s") @pytest.mark.asyncio async def test_resource_exhaustion_recovery(self, large_sales_dataset): """Test service recovery from resource exhaustion""" trainer = BakeryMLTrainer() # Simulate resource exhaustion by running many concurrent requests num_concurrent = 10 # High concurrency to stress the system async def resource_intensive_task(task_id: int): """Task designed to consume resources""" try: # Use all products to increase memory usage all_products_data = large_sales_dataset.copy() result = await trainer.train_tenant_models( tenant_id=f"resource_test_{task_id}", sales_data=all_products_data, config={ "train_all_products": True, "include_weather": True, "include_traffic": True } ) return {'task_id': task_id, 'status': 'completed', 'error': None} except Exception as e: return {'task_id': task_id, 'status': 'failed', 'error': str(e)} # Launch all tasks simultaneously start_time = time.time() tasks = [resource_intensive_task(i) for i in range(num_concurrent)] results = await asyncio.gather(*tasks, return_exceptions=True) duration = time.time() - start_time # Analyze results completed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'completed') failed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'failed') exceptions = sum(1 for r in results if isinstance(r, Exception)) # The system should handle some failures gracefully # but should complete at least some requests total_processed = completed + failed + exceptions processing_rate = total_processed / num_concurrent assert processing_rate >= 0.5, f"Too many requests not processed: {processing_rate:.2%}" assert duration < 600, f"Recovery took too long: {duration:.2f}s" # 10 minutes max print(f"Resource Exhaustion Test Results:") print(f" Concurrent Requests: {num_concurrent}") print(f" Completed: {completed}") print(f" Failed: {failed}") print(f" Exceptions: {exceptions}") print(f" Duration: {duration:.2f}s") print(f" Processing Rate: {processing_rate:.2%}") # ================================================================ # BENCHMARK UTILITIES # ================================================================ class PerformanceBenchmark: """Utility class for performance benchmarking""" @staticmethod def measure_execution_time(func): """Decorator to measure execution time""" async def wrapper(*args, **kwargs): start_time = time.time() result = await func(*args, **kwargs) execution_time = time.time() - start_time if hasattr(result, 'update') and isinstance(result, dict): result['execution_time'] = execution_time return result return wrapper @staticmethod def memory_profiler(func): """Decorator to profile memory usage""" async def wrapper(*args, **kwargs): process = psutil.Process() # Memory before gc.collect() memory_before = process.memory_info().rss / 1024 / 1024 result = await func(*args, **kwargs) # Memory after memory_after = process.memory_info().rss / 1024 / 1024 memory_used = memory_after - memory_before if hasattr(result, 'update') and isinstance(result, dict): result['memory_used_mb'] = memory_used return result return wrapper # ================================================================ # STANDALONE EXECUTION # ================================================================ if __name__ == "__main__": """ Run performance tests as standalone script Usage: python test_performance.py """ import sys import os from unittest.mock import patch # Add the training service root to Python path training_service_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, training_service_root) print("=" * 60) print("TRAINING SERVICE PERFORMANCE TEST SUITE") print("=" * 60) # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) # Run performance tests pytest.main([ __file__, "-v", "--tb=short", "-s", # Don't capture output "--durations=10", # Show 10 slowest tests "-m", "not slow", # Skip slow tests unless specifically requested ]) print("\n" + "=" * 60) print("PERFORMANCE TESTING COMPLETE") print("=" * 60)