# ================================================================ # services/training/tests/conftest.py # ================================================================ """ Test configuration and fixtures for Training Service Provides shared fixtures, mock data, and test utilities """ import pytest import asyncio import pandas as pd import numpy as np import tempfile import os import json from datetime import datetime, timedelta from unittest.mock import Mock, AsyncMock, patch from typing import Dict, List, Any, Generator from pathlib import Path import logging from app.models.training import ModelTrainingLog, TrainedModel # Configure pytest-asyncio pytestmark = pytest.mark.asyncio # Suppress Prophet logging during tests logging.getLogger('prophet').setLevel(logging.WARNING) logging.getLogger('cmdstanpy').setLevel(logging.WARNING) # ================================================================ # PYTEST CONFIGURATION # ================================================================ @pytest.fixture def large_dataset_for_performance(): """Generate large dataset for performance testing""" # Generate 2 years of data with 15 products start_date = datetime(2022, 1, 1) end_date = datetime(2024, 1, 1) date_range = pd.date_range(start=start_date, end=end_date, freq='D') products = [ "Pan Integral", "Pan Blanco", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras", "Donuts", "Berlinas", "Napolitanas", "Ensaimadas", "Baguette", "Pan de Molde", "Bizcocho" ] data = [] for date in date_range: for product in products: # Realistic sales with patterns base_quantity = np.random.randint(5, 150) # Seasonal patterns if date.month in [12, 1]: # Winter/Holiday season base_quantity *= 1.4 elif date.month in [6, 7, 8]: # Summer base_quantity *= 0.8 # Weekly patterns if date.weekday() >= 5: # Weekends base_quantity *= 1.2 elif date.weekday() == 0: # Monday base_quantity *= 0.7 # Add noise quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1))) data.append({ "date": date.strftime("%Y-%m-%d"), "product": product, "quantity": quantity, "revenue": round(quantity * np.random.uniform(1.5, 8.0), 2), "temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1), "precipitation": max(0, np.random.exponential(0.8)), "is_weekend": date.weekday() >= 5, "is_holiday": _is_spanish_holiday(date) }) return pd.DataFrame(data) @pytest.fixture def memory_monitor(): """Memory monitoring utility for performance tests""" import psutil import gc class MemoryMonitor: def __init__(self): self.process = psutil.Process() self.snapshots = [] def snapshot(self, label: str): gc.collect() # Force garbage collection memory_mb = self.process.memory_info().rss / 1024 / 1024 self.snapshots.append({ 'label': label, 'memory_mb': memory_mb, 'timestamp': datetime.now() }) return memory_mb def get_peak_usage(self): if not self.snapshots: return 0 return max(s['memory_mb'] for s in self.snapshots) def get_usage_increase(self): if len(self.snapshots) < 2: return 0 return self.snapshots[-1]['memory_mb'] - self.snapshots[0]['memory_mb'] def report(self): print("\n=== Memory Usage Report ===") for snapshot in self.snapshots: print(f"{snapshot['label']}: {snapshot['memory_mb']:.2f} MB") print(f"Peak Usage: {self.get_peak_usage():.2f} MB") print(f"Total Increase: {self.get_usage_increase():.2f} MB") return MemoryMonitor() @pytest.fixture def timing_monitor(): """Timing monitoring utility for performance tests""" import time class TimingMonitor: def __init__(self): self.timings = [] self.start_time = None def start(self, label: str): self.start_time = time.time() self.current_label = label def stop(self): if self.start_time is None: return 0 duration = time.time() - self.start_time self.timings.append({ 'label': self.current_label, 'duration': duration }) self.start_time = None return duration def get_total_time(self): return sum(t['duration'] for t in self.timings) def report(self): print("\n=== Timing Report ===") for timing in self.timings: print(f"{timing['label']}: {timing['duration']:.2f}s") print(f"Total Time: {self.get_total_time():.2f}s") return TimingMonitor() # ================================================================ # INTEGRATION TEST FIXTURES # ================================================================ @pytest.fixture async def integration_test_setup( mock_external_services, sample_bakery_sales_data, temp_model_storage ): """Complete setup for integration tests""" # Patch model storage path with patch('app.core.config.settings.MODEL_STORAGE_PATH', str(temp_model_storage)): # Patch data fetching to use sample data with patch('app.services.training_service.TrainingService._fetch_sales_data') as mock_fetch: mock_fetch.return_value = sample_bakery_sales_data yield { 'external_services': mock_external_services, 'sales_data': sample_bakery_sales_data, 'model_storage': temp_model_storage, 'mock_fetch': mock_fetch } @pytest.fixture def mock_messaging(): """Mock messaging system for testing""" with patch('app.services.messaging.publish_job_started') as mock_started, \ patch('app.services.messaging.publish_job_completed') as mock_completed, \ patch('app.services.messaging.publish_job_failed') as mock_failed, \ patch('app.services.messaging.publish_model_trained') as mock_model: yield { 'publish_job_started': mock_started, 'publish_job_completed': mock_completed, 'publish_job_failed': mock_failed, 'publish_model_trained': mock_model } # ================================================================ # API TEST FIXTURES # ================================================================ @pytest.fixture async def test_app(): """Test FastAPI application instance""" from app.main import app return app @pytest.fixture def test_client(test_app): """Create test client for API testing - SYNC VERSION""" from httpx import Client with Client(app=test_app, base_url="http://test") as client: yield client @pytest.fixture def auth_headers(): """Mock authentication headers""" return { "Authorization": "Bearer test_token_123", "X-Tenant-ID": "test_tenant_123" } # ================================================================ # ERROR SIMULATION FIXTURES # ================================================================ @pytest.fixture def failing_external_services(): """Mock external services that fail for error testing""" with patch('app.external.aemet.AEMETClient') as mock_aemet, \ patch('app.external.madrid_opendata.MadridOpenDataClient') as mock_madrid: # Configure to raise exceptions mock_aemet_instance = AsyncMock() mock_aemet.return_value = mock_aemet_instance mock_aemet_instance.get_historical_weather.side_effect = Exception("AEMET API Error") mock_madrid_instance = AsyncMock() mock_madrid.return_value = mock_madrid_instance mock_madrid_instance.get_historical_traffic.side_effect = Exception("Madrid API Error") yield { 'aemet': mock_aemet_instance, 'madrid': mock_madrid_instance } @pytest.fixture def corrupted_sales_data(sample_bakery_sales_data): """Sales data with various quality issues for testing""" corrupted_data = sample_bakery_sales_data.copy() # Introduce missing values (20% of quantity data) missing_mask = np.random.random(len(corrupted_data)) < 0.2 corrupted_data.loc[missing_mask, 'quantity'] = np.nan # Introduce extreme outliers (1% of data) outlier_mask = np.random.random(len(corrupted_data)) < 0.01 corrupted_data.loc[outlier_mask, 'quantity'] *= 100 # Introduce inconsistent dates (0.5% of data) future_mask = np.random.random(len(corrupted_data)) < 0.005 corrupted_data.loc[future_mask, 'date'] = "2025-12-31" # Introduce negative values (0.2% of data) negative_mask = np.random.random(len(corrupted_data)) < 0.002 corrupted_data.loc[negative_mask, 'quantity'] = -10 return corrupted_data # ================================================================ # VALIDATION TEST FIXTURES # ================================================================ @pytest.fixture def insufficient_sales_data(): """Sales data with insufficient volume for training""" # Only 10 days of data start_date = datetime(2023, 1, 1) dates = [start_date + timedelta(days=i) for i in range(10)] data = [] for date in dates: data.append({ "date": date.strftime("%Y-%m-%d"), "product": "Pan Integral", "quantity": np.random.randint(10, 50), "revenue": round(np.random.uniform(20, 100), 2), "temperature": round(np.random.uniform(10, 25), 1), "precipitation": 0.0, "is_weekend": date.weekday() >= 5, "is_holiday": False }) return pd.DataFrame(data) @pytest.fixture def seasonal_product_data(): """Data for seasonal product (Roscon Reyes) testing""" start_date = datetime(2023, 1, 1) dates = [start_date + timedelta(days=i) for i in range(365)] data = [] for date in dates: # Roscon Reyes has strong seasonal pattern (Christmas specialty) base_qty = 5 # Very low base if date.month == 12: # December - high sales base_qty = 20 + (date.day - 1) * 2 # Increasing through December elif date.month == 1 and date.day <= 6: # Until Epiphany base_qty = 50 # Add some noise quantity = max(1, int(base_qty + np.random.normal(0, base_qty * 0.2))) data.append({ "date": date.strftime("%Y-%m-%d"), "product": "Roscon Reyes", "quantity": quantity, "revenue": round(quantity * 25.0, 2), # Expensive specialty item "temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi), 1), "precipitation": max(0, np.random.exponential(0.5)), "is_weekend": date.weekday() >= 5, "is_holiday": _is_spanish_holiday(date) }) return pd.DataFrame(data) # ================================================================ # CLEANUP FIXTURES # ================================================================ @pytest.fixture(autouse=True) def cleanup_after_test(): """Automatic cleanup after each test""" yield # Clean up any test files import tempfile import shutil # Clear any temporary model files temp_dirs = [d for d in os.listdir(tempfile.gettempdir()) if d.startswith('test_models_')] for temp_dir in temp_dirs: try: shutil.rmtree(os.path.join(tempfile.gettempdir(), temp_dir)) except: pass # ================================================================ # TEST DATA VALIDATION UTILITIES # ================================================================ class TestDataValidator: """Utility class for validating test data quality""" @staticmethod def validate_sales_data(df: pd.DataFrame) -> Dict[str, Any]: """Validate sales data structure and quality""" required_columns = ['date', 'product', 'quantity', 'revenue'] missing_columns = [col for col in required_columns if col not in df.columns] if missing_columns: return {'valid': False, 'error': f'Missing columns: {missing_columns}'} # Check data types try: pd.to_datetime(df['date']) except: return {'valid': False, 'error': 'Invalid date format'} if not pd.api.types.is_numeric_dtype(df['quantity']): return {'valid': False, 'error': 'Quantity must be numeric'} if not pd.api.types.is_numeric_dtype(df['revenue']): return {'valid': False, 'error': 'Revenue must be numeric'} # Check for negative values if (df['quantity'] < 0).any(): return {'valid': False, 'error': 'Negative quantities found'} if (df['revenue'] < 0).any(): return {'valid': False, 'error': 'Negative revenue found'} return {'valid': True, 'rows': len(df), 'products': df['product'].nunique()} @pytest.fixture def test_data_validator(): """Test data validator utility""" return TestDataValidator() # ================================================================ # LOGGING CONFIGURATION FOR TESTS # ================================================================ @pytest.fixture(autouse=True) def configure_test_logging(): """Configure logging for tests""" import logging # Reduce log level for external libraries during tests logging.getLogger('prophet').setLevel(logging.WARNING) logging.getLogger('cmdstanpy').setLevel(logging.ERROR) logging.getLogger('matplotlib').setLevel(logging.WARNING) logging.getLogger('urllib3').setLevel(logging.WARNING) # Configure our app logging for tests logger = logging.getLogger('app') logger.setLevel(logging.INFO) yield # Reset logging after tests logging.getLogger().handlers.clear() # ================================================================ # ENVIRONMENT SETUP # ================================================================ @pytest.fixture(scope="session", autouse=True) def setup_test_environment(): """Setup test environment variables""" os.environ.update({ 'ENVIRONMENT': 'test', 'LOG_LEVEL': 'INFO', 'MODEL_STORAGE_PATH': '/tmp/test_models', 'MAX_TRAINING_TIME_MINUTES': '5', 'MIN_TRAINING_DATA_DAYS': '7', 'PROPHET_SEASONALITY_MODE': 'additive', 'ENABLE_SYNTHETIC_DATA': 'true', 'SKIP_EXTERNAL_API_CALLS': 'true' }) yield # Cleanup environment - FIXED: removed (scope="session") test_vars = [ 'ENVIRONMENT', 'LOG_LEVEL', 'MODEL_STORAGE_PATH', 'MAX_TRAINING_TIME_MINUTES', 'MIN_TRAINING_DATA_DAYS', 'PROPHET_SEASONALITY_MODE', 'ENABLE_SYNTHETIC_DATA', 'SKIP_EXTERNAL_API_CALLS' ] for var in test_vars: os.environ.pop(var, None) # FIXED: removed the erroneous (scope="session") def event_loop(): """Create an instance of the default event loop for the test session.""" loop = asyncio.new_event_loop() yield loop loop.close() def pytest_configure(config): """Configure pytest with custom markers""" config.addinivalue_line( "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')" ) config.addinivalue_line( "markers", "integration: marks tests as integration tests" ) config.addinivalue_line( "markers", "unit: marks tests as unit tests" ) config.addinivalue_line( "markers", "performance: marks tests as performance tests" ) config.addinivalue_line( "markers", "external: marks tests that require external services" ) def pytest_collection_modifyitems(config, items): """Modify test collection to add markers automatically""" for item in items: # Mark performance tests if "performance" in item.nodeid: item.add_marker(pytest.mark.performance) item.add_marker(pytest.mark.slow) # Mark integration tests if "integration" in item.nodeid: item.add_marker(pytest.mark.integration) # Mark end-to-end tests if "end_to_end" in item.nodeid: item.add_marker(pytest.mark.integration) item.add_marker(pytest.mark.external) # Mark unit tests (default for others) if not any(marker.name in ["integration", "performance"] for marker in item.iter_markers()): item.add_marker(pytest.mark.unit) # ================================================================ # TEST DATABASE FIXTURES # ================================================================ @pytest_asyncio.fixture async def test_db_session(): """Create async test database session""" from app.core.database import database_manager async with database_manager.async_session_local() as session: yield session @pytest.fixture def training_job_in_db(test_db_session): """Create a training job in database for testing""" from app.models.training import ModelTrainingLog # Add this import from datetime import datetime job = ModelTrainingLog( job_id="test-job-123", tenant_id="test-tenant", status="running", progress=50, current_step="Training models", start_time=datetime.now(), # Use start_time, not started_at config={"include_weather": True}, created_at=datetime.now(), updated_at=datetime.now() ) test_db_session.add(job) test_db_session.commit() test_db_session.refresh(job) return job @pytest.fixture def trained_model_in_db(test_db_session): """Create a trained model in database for testing""" from app.models.training import TrainedModel # Add this import from datetime import datetime model = TrainedModel( model_id="test-model-123", tenant_id="test-tenant", product_name="Pan Integral", model_type="prophet", model_path="/tmp/test_model.pkl", version=1, training_samples=100, features=["temperature", "humidity"], hyperparameters={"seasonality_mode": "additive"}, training_metrics={"mae": 2.5, "mse": 8.3}, is_active=True, created_at=datetime.now() ) test_db_session.add(model) test_db_session.commit() test_db_session.refresh(model) return model # ================================================================ # SAMPLE DATA FIXTURES # ================================================================ @pytest.fixture def sample_bakery_sales_data(): """Generate comprehensive bakery sales data for testing""" # Generate 1 year of data start_date = datetime(2023, 1, 1) dates = [start_date + timedelta(days=i) for i in range(365)] # Spanish bakery products with realistic patterns products = [ "Pan Integral", "Pan Blanco", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras", "Donuts", "Berlinas", "Napolitanas", "Ensaimadas" ] # Product-specific configurations product_config = { "Pan Integral": {"base": 80, "price": 2.80, "weekend_boost": 1.1, "seasonal": False}, "Pan Blanco": {"base": 120, "price": 2.50, "weekend_boost": 1.2, "seasonal": False}, "Croissant": {"base": 45, "price": 1.50, "weekend_boost": 1.4, "seasonal": False}, "Magdalenas": {"base": 30, "price": 1.20, "weekend_boost": 1.1, "seasonal": False}, "Empanadas": {"base": 25, "price": 3.50, "weekend_boost": 0.9, "seasonal": False}, "Tarta Chocolate": {"base": 15, "price": 18.00, "weekend_boost": 1.6, "seasonal": False}, "Roscon Reyes": {"base": 8, "price": 25.00, "weekend_boost": 1.0, "seasonal": True}, "Palmeras": {"base": 12, "price": 1.80, "weekend_boost": 1.2, "seasonal": False}, "Donuts": {"base": 20, "price": 1.40, "weekend_boost": 1.3, "seasonal": False}, "Berlinas": {"base": 18, "price": 1.60, "weekend_boost": 1.2, "seasonal": False}, "Napolitanas": {"base": 22, "price": 1.70, "weekend_boost": 1.1, "seasonal": False}, "Ensaimadas": {"base": 15, "price": 2.20, "weekend_boost": 1.0, "seasonal": False} } data = [] for date in dates: # Calculate date-specific factors day_of_year = date.timetuple().tm_yday is_weekend = date.weekday() >= 5 is_holiday = _is_spanish_holiday(date) # Madrid weather simulation temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi) + np.random.normal(0, 3) precip = max(0, np.random.exponential(0.8)) for product in products: config = product_config[product] # Base quantity base_qty = config["base"] # Apply weekend boost if is_weekend: base_qty *= config["weekend_boost"] # Apply holiday boost if is_holiday: base_qty *= 1.3 # Seasonal products (like Roscon Reyes for Christmas) if config["seasonal"] and product == "Roscon Reyes": if date.month == 12: # Exponential increase through December base_qty *= (1 + (date.day - 1) / 5) elif date.month == 1 and date.day <= 6: # High demand until Epiphany (Jan 6) base_qty *= 3 else: # Very low demand rest of year base_qty *= 0.1 # Weather effects if temp > 30: # Very hot days if product in ["Pan Integral", "Pan Blanco"]: base_qty *= 0.7 # Less bread elif product in ["Donuts", "Berlinas"]: base_qty *= 0.8 # Less fried items elif temp < 5: # Cold days base_qty *= 1.15 # More baked goods # Add realistic noise and ensure minimum of 1 quantity = max(1, int(base_qty + np.random.normal(0, base_qty * 0.12))) revenue = round(quantity * config["price"], 2) data.append({ "date": date.strftime("%Y-%m-%d"), "product": product, "quantity": quantity, "revenue": revenue, "temperature": round(temp, 1), "precipitation": round(precip, 2), "is_weekend": is_weekend, "is_holiday": is_holiday }) return pd.DataFrame(data) @pytest.fixture def sample_weather_data(): """Generate realistic Madrid weather data""" start_date = datetime(2023, 1, 1) weather_data = [] for i in range(365): date = start_date + timedelta(days=i) day_of_year = date.timetuple().tm_yday # Madrid climate simulation base_temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi) # Seasonal humidity patterns base_humidity = 50 + 20 * np.sin((day_of_year / 365) * 2 * np.pi + np.pi) weather_data.append({ "date": date, "temperature": round(base_temp + np.random.normal(0, 4), 1), "precipitation": max(0, np.random.exponential(1.2)), "humidity": np.random.uniform(25, 75), "wind_speed": np.random.uniform(3, 20), "pressure": np.random.uniform(995, 1025), "description": np.random.choice([ "Soleado", "Parcialmente nublado", "Nublado", "Lluvia ligera", "Despejado", "Variable" ]), "source": "aemet_test" }) return weather_data @pytest.fixture def sample_traffic_data(): """Generate realistic Madrid traffic data""" start_date = datetime(2023, 1, 1) traffic_data = [] for i in range(365): date = start_date + timedelta(days=i) # Generate multiple measurements per day for hour in range(6, 22, 2): # Every 2 hours from 6 AM to 10 PM measurement_time = date.replace(hour=hour) # Madrid traffic patterns if hour in [7, 8, 9, 18, 19, 20]: # Rush hours volume = np.random.randint(1200, 2000) congestion = "high" speed = np.random.randint(10, 25) occupation = np.random.randint(60, 90) elif hour in [12, 13, 14]: # Lunch time volume = np.random.randint(800, 1200) congestion = "medium" speed = np.random.randint(20, 35) occupation = np.random.randint(40, 70) else: # Off-peak volume = np.random.randint(300, 800) congestion = "low" speed = np.random.randint(30, 50) occupation = np.random.randint(15, 50) # Weekend adjustment if date.weekday() >= 5: volume = int(volume * 0.8) # Less traffic on weekends speed = min(50, int(speed * 1.2)) # Faster speeds traffic_data.append({ "date": measurement_time, "traffic_volume": volume, "occupation_percentage": occupation, "load_percentage": min(95, occupation + np.random.randint(5, 15)), "average_speed": speed, "congestion_level": congestion, "pedestrian_count": np.random.randint(100, 800), "measurement_point_id": "MADRID_TEST_001", "measurement_point_name": "Plaza Mayor", "road_type": "URB", "source": "madrid_opendata_test" }) return traffic_data # ================================================================ # MOCK SERVICES FIXTURES # ================================================================ @pytest.fixture async def mock_aemet_client(sample_weather_data): """Mock AEMET weather API client""" with patch('app.external.aemet.AEMETClient') as mock_class: mock_instance = AsyncMock() mock_class.return_value = mock_instance # Configure mock responses mock_instance.get_historical_weather.return_value = sample_weather_data mock_instance.get_current_weather.return_value = sample_weather_data[-1] mock_instance.get_weather_forecast.return_value = sample_weather_data[-7:] yield mock_instance @pytest.fixture async def mock_madrid_client(sample_traffic_data): """Mock Madrid OpenData API client""" with patch('app.external.madrid_opendata.MadridOpenDataClient') as mock_class: mock_instance = AsyncMock() mock_class.return_value = mock_instance # Configure mock responses mock_instance.get_historical_traffic.return_value = sample_traffic_data mock_instance.get_current_traffic.return_value = sample_traffic_data[-1] yield mock_instance @pytest.fixture async def mock_external_services(mock_aemet_client, mock_madrid_client): """Combined mock for all external services""" return { 'aemet': mock_aemet_client, 'madrid': mock_madrid_client } # ================================================================ # ML COMPONENT FIXTURES # ================================================================ @pytest.fixture def mock_ml_trainer(): """Mock ML trainer for testing""" with patch('app.ml.trainer.BakeryMLTrainer') as mock_class: mock_instance = AsyncMock() mock_class.return_value = mock_instance # Configure successful training responses mock_instance.train_single_product.return_value = { "status": "completed", "model_id": "test_model_123", "metrics": { "mape": 25.5, "rmse": 12.3, "mae": 8.7, "r2_score": 0.85 }, "training_duration": 45.2, "data_points_used": 365 } mock_instance.train_tenant_models.return_value = [ { "product_name": "Pan Integral", "model_id": "model_pan_integral_123", "metrics": {"mape": 22.1, "rmse": 10.5, "mae": 7.8}, "training_completed": True }, { "product_name": "Croissant", "model_id": "model_croissant_456", "metrics": {"mape": 28.3, "rmse": 8.9, "mae": 6.2}, "training_completed": True } ] yield mock_instance @pytest.fixture def mock_data_processor(): """Mock data processor for testing""" with patch('app.ml.data_processor.BakeryDataProcessor') as mock_class: mock_instance = AsyncMock() mock_class.return_value = mock_instance # Configure mock responses mock_instance.validate_data_quality.return_value = { "is_valid": True, "data_points": 1000, "missing_percentage": 2.5, "issues": [] } mock_instance.prepare_training_data.return_value = pd.DataFrame({ "ds": pd.date_range("2023-01-01", periods=365), "y": np.random.randint(10, 100, 365), "temperature": np.random.uniform(0, 35, 365), "traffic_volume": np.random.randint(100, 2000, 365) }) yield mock_instance @pytest.fixture def mock_data_service(): """Mock data service for testing""" from unittest.mock import Mock, AsyncMock mock_service = Mock() mock_service.get_sales_data = AsyncMock(return_value=[ {"date": "2024-01-01", "product_name": "Pan Integral", "quantity": 45}, {"date": "2024-01-02", "product_name": "Pan Integral", "quantity": 38} ]) mock_service.get_weather_data = AsyncMock(return_value=[ {"date": "2024-01-01", "temperature": 20.5, "humidity": 65} ]) mock_service.get_traffic_data = AsyncMock(return_value=[ {"date": "2024-01-01", "traffic_index": 0.7} ]) return mock_service @pytest.fixture def mock_prophet_manager(): """Mock Prophet manager for testing""" with patch('app.ml.prophet_manager.BakeryProphetManager') as mock_class: mock_instance = AsyncMock() mock_class.return_value = mock_instance # Configure mock responses mock_instance.train_model.return_value = { "model": Mock(), # Mock Prophet model "metrics": { "mape": 23.7, "rmse": 11.2, "mae": 8.1 }, "cross_validation": { "cv_mape_mean": 25.1, "cv_mape_std": 3.2 } } mock_instance.generate_predictions.return_value = pd.DataFrame({ "ds": pd.date_range("2024-01-01", periods=30), "yhat": np.random.uniform(20, 80, 30), "yhat_lower": np.random.uniform(10, 60, 30), "yhat_upper": np.random.uniform(30, 100, 30) }) yield mock_instance # ================================================================ # UTILITY FIXTURES # ================================================================ @pytest.fixture def temp_model_storage(): """Temporary directory for model storage during tests""" with tempfile.TemporaryDirectory() as temp_dir: yield Path(temp_dir) @pytest.fixture def test_config(): """Test configuration settings""" return { "MODEL_STORAGE_PATH": "/tmp/test_models", "MAX_TRAINING_TIME_MINUTES": 5, "MIN_TRAINING_DATA_DAYS": 7, "PROPHET_SEASONALITY_MODE": "additive", "INCLUDE_SPANISH_HOLIDAYS": True, "ENABLE_SYNTHETIC_DATA": True } @pytest.fixture def sample_training_request(): """Sample training request for API tests""" return { "products": ["Pan Integral", "Croissant"], "include_weather": True, "include_traffic": True, "config": { "seasonality_mode": "additive", "changepoint_prior_scale": 0.05, "seasonality_prior_scale": 10.0, "validation_enabled": True } } @pytest.fixture def sample_single_product_request(): """Sample single product training request""" return { "product_name": "Pan Integral", "include_weather": True, "include_traffic": False, "config": { "seasonality_mode": "multiplicative", "include_holidays": True, "holiday_prior_scale": 15.0 } } # ================================================================ # HELPER FUNCTIONS # ================================================================ def _is_spanish_holiday(date: datetime) -> bool: """Check if date is a Spanish holiday""" spanish_holidays = [ (1, 1), # Año Nuevo (1, 6), # Reyes Magos (5, 1), # Día del Trabajo (8, 15), # Asunción de la Virgen (10, 12), # Fiesta Nacional de España (11, 1), # Todos los Santos (12, 6), # Día de la Constitución (12, 8), # Inmaculada Concepción (12, 25), # Navidad ] return (date.month, date.day) in spanish_holidays @pytest.fixture def spanish_holidays_2023(): """List of Spanish holidays for 2023""" holidays = [] for month, day in [ (1, 1), (1, 6), (5, 1), (8, 15), (10, 12), (11, 1), (12, 6), (12, 8), (12, 25) ]: holidays.append(datetime(2023, month, day)) return holidays # ================================================================ # PERFORMANCE TESTING FIXTURES # ================================================================ @pytest.fixture def large_dataset_for_performance(): """Generate large dataset for performance testing""" # Generate 2 years of data with 15 products start_date = datetime(2022, 1, 1) end_date = datetime(2024, 1, 1) date_range = pd.date_range(start=start_date, end=end_date, freq='D') products = [ "Pan Integral", "Pan Blanco", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras", "Donuts", "Berlinas", "Napolitanas", "Ensaimadas", "Baguette", "Pan de Molde", "Bizcocho" ] data = [] for date in date_range: for product in products: # Realistic sales with patterns base_quantity = np.random.randint(5, 150) # Seasonal patterns if date.month in [12, 1]: # Winter/Holiday season base_quantity *= 1.4 elif date.month in [6, 7, 8]: # Summer base_quantity *= 0.8 # Weekly patterns if date.weekday() >= 5: # Weekends base_quantity *= 1.2 elif date.weekday() == 0: # Monday base_quantity *= 0.7 # Add noise quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1))) data.append({ "date": date.strftime("%Y-%m-%d"), "product": product, "quantity": quantity, "revenue": round(quantity * np.random.uniform(1.5, 8.0), 2), "temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1), "precipitation": max(0, np.random.exponential(0.8)), "is_weekend": date.weekday() >= 5, "is_holiday": _is_spanish_holiday(date) }) return pd.DataFrame(data) @pytest.fixture def memory_monitor(): """Memory monitoring utility for performance tests""" import psutil import gc class MemoryMonitor: def __init__(self): self.process = psutil.Process() self.snapshots = [] def snapshot(self, label: str): gc.collect() # Force garbage collection memory_mb = self.process.memory_info().rss / 1024 / 1024 self.snapshots.append({ 'label': label, 'memory_mb': memory_mb, 'timestamp': datetime.now() }) return memory_mb def get_peak_usage(self): if not self.snapshots: return 0 return max(s['memory_mb'] for s in self.snapshots) def get_usage_increase(self): if len(self.snapshots) < 2: return 0 return self.snapshots[-1]['memory_mb'] - self.snapshots[0]['memory_mb'] def report(self): print("\n=== Memory Usage Report ===") for snapshot in self.snapshots: print(f"{snapshot['label']}: {snapshot['memory_mb']:.2f} MB") print(f"Peak Usage: {self.get_peak_usage():.2f} MB") print(f"Total Increase: {self.get_usage_increase():.2f} MB") return MemoryMonitor() @pytest.fixture def timing_monitor(): """Timing monitoring utility for performance tests""" import time class TimingMonitor: def __init__(self): self.timings = [] self.start_time = None def start(self, label: str): self.start_time = time.time() self.current_label = label def stop(self): if self.start_time is None: return 0 duration = time.time() - self.start_time self.timings.append({ 'label': self.current_label, 'duration': duration }) self.start_time = None return duration def get_total_time(self): return sum(t['duration'] for t in self.timings) def report(self): print("\n=== Timing Report ===") for timing in self.timings: print(f"{timing['label']}: {timing['duration']:.2f}s") print(f"Total Time: {self.get_total_time():.2f}s") return TimingMonitor() # ================================================================ # ADDITIONAL FIXTURES FOR COMPREHENSIVE TESTING # ================================================================ @pytest.fixture def mock_job_scheduler(): """Mock job scheduler for testing""" with patch('app.services.job_scheduler.JobScheduler') as mock_scheduler: mock_instance = Mock() mock_scheduler.return_value = mock_instance mock_instance.schedule_job.return_value = "scheduled_job_123" mock_instance.cancel_job.return_value = True mock_instance.get_job_status.return_value = "running" yield mock_instance @pytest.fixture def sample_model_metadata(): """Sample model metadata for testing""" return { "model_id": "test_model_123", "tenant_id": "test_tenant", "product_name": "Pan Integral", "model_type": "prophet", "training_date": datetime.now().isoformat(), "data_points_used": 365, "features_used": ["temperature", "is_weekend", "is_holiday"], "metrics": { "mape": 23.5, "rmse": 12.3, "mae": 8.7, "r2_score": 0.85 }, "hyperparameters": { "seasonality_mode": "additive", "changepoint_prior_scale": 0.05, "seasonality_prior_scale": 10.0 }, "version": "1.0", "status": "active" } @pytest.fixture def training_progress_states(): """Different training progress states for testing""" return [ {"status": "pending", "progress": 0, "current_step": "Initializing training job"}, {"status": "running", "progress": 10, "current_step": "Fetching sales data"}, {"status": "running", "progress": 25, "current_step": "Processing weather data"}, {"status": "running", "progress": 40, "current_step": "Processing traffic data"}, {"status": "running", "progress": 55, "current_step": "Engineering features"}, {"status": "running", "progress": 70, "current_step": "Training Pan Integral model"}, {"status": "running", "progress": 85, "current_step": "Validating model performance"}, {"status": "running", "progress": 95, "current_step": "Saving model artifacts"}, {"status": "completed", "progress": 100, "current_step": "Training completed successfully"} ] @pytest.fixture def error_scenarios(): """Different error scenarios for testing""" return { "insufficient_data": { "error_type": "DataError", "error_message": "Insufficient training data: only 15 days available, minimum 30 required", "error_code": "INSUFFICIENT_DATA" }, "external_api_failure": { "error_type": "ExternalAPIError", "error_message": "Failed to fetch weather data from AEMET API", "error_code": "WEATHER_API_ERROR" }, "model_training_failure": { "error_type": "ModelTrainingError", "error_message": "Prophet model training failed: unable to fit data", "error_code": "MODEL_TRAINING_FAILED" }, "data_quality_error": { "error_type": "DataQualityError", "error_message": "Data quality issues detected: 45% missing values in quantity column", "error_code": "DATA_QUALITY_POOR" } } @pytest.fixture def performance_benchmarks(): """Performance benchmarks for testing""" return { "single_product_training": { "max_duration_seconds": 120, "max_memory_mb": 500, "min_accuracy_mape": 50 }, "multi_product_training": { "max_duration_seconds": 300, "max_memory_mb": 1000, "min_accuracy_mape": 55 }, "data_processing": { "max_throughput_rows_per_second": 1000, "max_memory_per_1k_rows_mb": 10 }, "concurrent_jobs": { "max_concurrent_jobs": 5, "max_queue_time_seconds": 30 } } @pytest.fixture def mock_model_storage(): """Mock model storage system for testing""" storage = {} class MockModelStorage: def save_model(self, model_id: str, model_data: Any, metadata: Dict[str, Any]): storage[model_id] = { "model_data": model_data, "metadata": metadata, "saved_at": datetime.now() } return f"/models/{model_id}.pkl" def load_model(self, model_id: str): if model_id in storage: return storage[model_id]["model_data"] raise FileNotFoundError(f"Model {model_id} not found") def get_metadata(self, model_id: str): if model_id in storage: return storage[model_id]["metadata"] raise FileNotFoundError(f"Model {model_id} not found") def delete_model(self, model_id: str): if model_id in storage: del storage[model_id] return True return False def list_models(self, tenant_id: str = None): models = [] for model_id, data in storage.items(): if tenant_id is None or data["metadata"].get("tenant_id") == tenant_id: models.append({ "model_id": model_id, "metadata": data["metadata"], "saved_at": data["saved_at"] }) return models return MockModelStorage() @pytest.fixture def real_world_scenarios(): """Real-world bakery scenarios for testing""" return { "holiday_rush": { "description": "Christmas season with high demand for seasonal products", "date_range": ("2023-12-15", "2023-12-31"), "expected_patterns": { "Roscon Reyes": {"multiplier": 5.0, "trend": "increasing"}, "Pan Integral": {"multiplier": 1.3, "trend": "stable"}, "Tarta Chocolate": {"multiplier": 2.0, "trend": "increasing"} } }, "summer_slowdown": { "description": "Summer period with generally lower sales", "date_range": ("2023-07-01", "2023-08-31"), "expected_patterns": { "Pan Integral": {"multiplier": 0.8, "trend": "decreasing"}, "Croissant": {"multiplier": 0.9, "trend": "stable"}, "Cold_drinks": {"multiplier": 1.5, "trend": "increasing"} } }, "weekend_patterns": { "description": "Weekend shopping patterns", "expected_patterns": { "weekend_boost": 1.2, "peak_hours": ["10:00", "11:00", "18:00", "19:00"], "popular_products": ["Croissant", "Palmeras", "Tarta Chocolate"] } }, "weather_impact": { "description": "Weather impact on sales", "scenarios": { "rainy_day": {"bread_sales": 1.1, "pastry_sales": 0.9}, "hot_day": {"bread_sales": 0.8, "cold_items": 1.3}, "cold_day": {"bread_sales": 1.2, "hot_items": 1.4} } } } @pytest.fixture def data_quality_test_cases(): """Various data quality test cases""" return { "missing_values": { "quantity_missing_5pct": 0.05, "quantity_missing_20pct": 0.20, "quantity_missing_50pct": 0.50, "revenue_missing_10pct": 0.10 }, "outliers": { "extreme_high": 100, # 100x normal values "extreme_low": 0.01, # Near-zero values "negative_values": -1, "outlier_percentage": 0.01 }, "inconsistencies": { "future_dates": ["2025-12-31", "2026-01-01"], "invalid_dates": ["2023-13-01", "2023-02-30"], "mismatched_revenue": True, # Revenue doesn't match quantity * price "duplicate_records": True }, "insufficient_data": { "too_few_days": 10, "too_few_products": 1, "sporadic_data": 0.3 # Only 30% of expected data points } } @pytest.fixture def api_test_scenarios(): """API testing scenarios""" return { "authentication": { "valid_token": "Bearer valid_test_token_123", "invalid_token": "Bearer invalid_token", "expired_token": "Bearer expired_token_456", "missing_token": None }, "request_validation": { "valid_request": { "products": ["Pan Integral"], "include_weather": True, "include_traffic": True, "config": {"seasonality_mode": "additive"} }, "invalid_products": { "products": [], # Empty products list "include_weather": True }, "invalid_config": { "products": ["Pan Integral"], "config": {"seasonality_mode": "invalid_mode"} }, "missing_required_fields": { "include_weather": True # Missing products } }, "rate_limiting": { "max_requests_per_minute": 60, "burst_requests": 100 } } @pytest.fixture def integration_test_dependencies(): """Dependencies for integration testing""" class IntegrationDependencies: def __init__(self): self.external_services = {} self.databases = {} self.message_queues = {} self.storage_systems = {} def register_external_service(self, name: str, mock_instance): self.external_services[name] = mock_instance def register_database(self, name: str, mock_session): self.databases[name] = mock_session def register_message_queue(self, name: str, mock_queue): self.message_queues[name] = mock_queue def register_storage(self, name: str, mock_storage): self.storage_systems[name] = mock_storage def get_service(self, name: str): return self.external_services.get(name) def get_database(self, name: str): return self.databases.get(name) def are_all_services_healthy(self): # Mock health check for all registered services return len(self.external_services) > 0 return IntegrationDependencies() @pytest.fixture def load_test_configuration(): """Configuration for load testing""" return { "concurrent_users": { "light_load": 5, "medium_load": 15, "heavy_load": 30, "stress_load": 50 }, "test_duration": { "quick_test": 60, # 1 minute "standard_test": 300, # 5 minutes "extended_test": 900 # 15 minutes }, "request_patterns": { "constant_rate": "steady", "ramp_up": "increasing", "spike": "burst", "random": "variable" }, "success_criteria": { "min_success_rate": 0.95, "max_response_time": 30.0, # seconds "max_error_rate": 0.05 } } @pytest.fixture def mock_notification_system(): """Mock notification system for testing""" notifications_sent = [] class MockNotificationSystem: def send_training_started(self, tenant_id: str, job_id: str, products: List[str]): notification = { "type": "training_started", "tenant_id": tenant_id, "job_id": job_id, "products": products, "timestamp": datetime.now() } notifications_sent.append(notification) return notification def send_training_completed(self, tenant_id: str, job_id: str, results: Dict[str, Any]): notification = { "type": "training_completed", "tenant_id": tenant_id, "job_id": job_id, "results": results, "timestamp": datetime.now() } notifications_sent.append(notification) return notification def send_training_failed(self, tenant_id: str, job_id: str, error: str): notification = { "type": "training_failed", "tenant_id": tenant_id, "job_id": job_id, "error": error, "timestamp": datetime.now() } notifications_sent.append(notification) return notification def get_notifications(self, tenant_id: str = None): if tenant_id: return [n for n in notifications_sent if n["tenant_id"] == tenant_id] return notifications_sent def clear_notifications(self): notifications_sent.clear() return MockNotificationSystem() @pytest.fixture def test_metrics_collector(): """Test metrics collector for monitoring test performance""" metrics = {} class TestMetricsCollector: def __init__(self): self.start_times = {} self.counters = {} self.gauges = {} self.histograms = {} def start_timer(self, metric_name: str): self.start_times[metric_name] = time.time() def end_timer(self, metric_name: str): if metric_name in self.start_times: duration = time.time() - self.start_times[metric_name] if metric_name not in self.histograms: self.histograms[metric_name] = [] self.histograms[metric_name].append(duration) del self.start_times[metric_name] return duration return 0 def increment_counter(self, counter_name: str, value: int = 1): self.counters[counter_name] = self.counters.get(counter_name, 0) + value def set_gauge(self, gauge_name: str, value: float): self.gauges[gauge_name] = value def get_counter(self, counter_name: str): return self.counters.get(counter_name, 0) def get_gauge(self, gauge_name: str): return self.gauges.get(gauge_name, 0) def get_histogram_stats(self, histogram_name: str): if histogram_name not in self.histograms: return {} values = self.histograms[histogram_name] return { "count": len(values), "min": min(values) if values else 0, "max": max(values) if values else 0, "avg": sum(values) / len(values) if values else 0, "p50": sorted(values)[len(values)//2] if values else 0, "p95": sorted(values)[int(len(values)*0.95)] if values else 0, "p99": sorted(values)[int(len(values)*0.99)] if values else 0 } def get_all_metrics(self): return { "counters": self.counters, "gauges": self.gauges, "histograms": {name: self.get_histogram_stats(name) for name in self.histograms} } def reset(self): self.start_times.clear() self.counters.clear() self.gauges.clear() self.histograms.clear() import time return TestMetricsCollector() # ================================================================ # PYTEST PLUGINS AND HOOKS # ================================================================ def pytest_runtest_setup(item): """Setup before each test""" # Add any pre-test setup logic here pass def pytest_runtest_teardown(item, nextitem): """Teardown after each test""" # Add any post-test cleanup logic here import gc gc.collect() # Force garbage collection after each test def pytest_sessionstart(session): """Called after the Session object has been created""" print("\n" + "="*80) print("TRAINING SERVICE TEST SESSION STARTING") print("="*80) def pytest_sessionfinish(session, exitstatus): """Called after whole test run finished""" print("\n" + "="*80) print("TRAINING SERVICE TEST SESSION FINISHED") print(f"Exit Status: {exitstatus}") print("="*80) # ================================================================ # FINAL CONFIGURATION # ================================================================ # Ensure numpy doesn't use too many threads during testing import numpy as np np.seterr(all='ignore') # Ignore numpy warnings during tests # Configure pandas for testing import pandas as pd pd.set_option('display.max_columns', None) pd.set_option('display.width', None) pd.set_option('display.max_colwidth', 50) # Set random seeds for reproducible tests np.random.seed(42) import random random.seed(42)