1632 lines
55 KiB
Python
1632 lines
55 KiB
Python
# ================================================================
|
|
# services/training/tests/conftest.py
|
|
# ================================================================
|
|
"""
|
|
Test configuration and fixtures for Training Service
|
|
Provides shared fixtures, mock data, and test utilities
|
|
"""
|
|
|
|
import pytest
|
|
import asyncio
|
|
import pandas as pd
|
|
import numpy as np
|
|
import tempfile
|
|
import os
|
|
import json
|
|
from datetime import datetime, timedelta
|
|
from unittest.mock import Mock, AsyncMock, patch
|
|
from typing import Dict, List, Any, Generator
|
|
from pathlib import Path
|
|
import logging
|
|
from app.models.training import ModelTrainingLog, TrainedModel
|
|
|
|
# Configure pytest-asyncio
|
|
pytestmark = pytest.mark.asyncio
|
|
|
|
# Suppress Prophet logging during tests
|
|
logging.getLogger('prophet').setLevel(logging.WARNING)
|
|
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)
|
|
|
|
|
|
# ================================================================
|
|
# PYTEST CONFIGURATION
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def large_dataset_for_performance():
|
|
"""Generate large dataset for performance testing"""
|
|
# Generate 2 years of data with 15 products
|
|
start_date = datetime(2022, 1, 1)
|
|
end_date = datetime(2024, 1, 1)
|
|
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
|
|
|
products = [
|
|
"Pan Integral", "Pan Blanco", "Croissant", "Magdalenas",
|
|
"Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
|
|
"Donuts", "Berlinas", "Napolitanas", "Ensaimadas",
|
|
"Baguette", "Pan de Molde", "Bizcocho"
|
|
]
|
|
|
|
data = []
|
|
for date in date_range:
|
|
for product in products:
|
|
# Realistic sales with patterns
|
|
base_quantity = np.random.randint(5, 150)
|
|
|
|
# Seasonal patterns
|
|
if date.month in [12, 1]: # Winter/Holiday season
|
|
base_quantity *= 1.4
|
|
elif date.month in [6, 7, 8]: # Summer
|
|
base_quantity *= 0.8
|
|
|
|
# Weekly patterns
|
|
if date.weekday() >= 5: # Weekends
|
|
base_quantity *= 1.2
|
|
elif date.weekday() == 0: # Monday
|
|
base_quantity *= 0.7
|
|
|
|
# Add noise
|
|
quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1)))
|
|
|
|
data.append({
|
|
"date": date.strftime("%Y-%m-%d"),
|
|
"product": product,
|
|
"quantity": quantity,
|
|
"revenue": round(quantity * np.random.uniform(1.5, 8.0), 2),
|
|
"temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1),
|
|
"precipitation": max(0, np.random.exponential(0.8)),
|
|
"is_weekend": date.weekday() >= 5,
|
|
"is_holiday": _is_spanish_holiday(date)
|
|
})
|
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
@pytest.fixture
|
|
def memory_monitor():
|
|
"""Memory monitoring utility for performance tests"""
|
|
import psutil
|
|
import gc
|
|
|
|
class MemoryMonitor:
|
|
def __init__(self):
|
|
self.process = psutil.Process()
|
|
self.snapshots = []
|
|
|
|
def snapshot(self, label: str):
|
|
gc.collect() # Force garbage collection
|
|
memory_mb = self.process.memory_info().rss / 1024 / 1024
|
|
self.snapshots.append({
|
|
'label': label,
|
|
'memory_mb': memory_mb,
|
|
'timestamp': datetime.now()
|
|
})
|
|
return memory_mb
|
|
|
|
def get_peak_usage(self):
|
|
if not self.snapshots:
|
|
return 0
|
|
return max(s['memory_mb'] for s in self.snapshots)
|
|
|
|
def get_usage_increase(self):
|
|
if len(self.snapshots) < 2:
|
|
return 0
|
|
return self.snapshots[-1]['memory_mb'] - self.snapshots[0]['memory_mb']
|
|
|
|
def report(self):
|
|
print("\n=== Memory Usage Report ===")
|
|
for snapshot in self.snapshots:
|
|
print(f"{snapshot['label']}: {snapshot['memory_mb']:.2f} MB")
|
|
print(f"Peak Usage: {self.get_peak_usage():.2f} MB")
|
|
print(f"Total Increase: {self.get_usage_increase():.2f} MB")
|
|
|
|
return MemoryMonitor()
|
|
|
|
|
|
@pytest.fixture
|
|
def timing_monitor():
|
|
"""Timing monitoring utility for performance tests"""
|
|
import time
|
|
|
|
class TimingMonitor:
|
|
def __init__(self):
|
|
self.timings = []
|
|
self.start_time = None
|
|
|
|
def start(self, label: str):
|
|
self.start_time = time.time()
|
|
self.current_label = label
|
|
|
|
def stop(self):
|
|
if self.start_time is None:
|
|
return 0
|
|
|
|
duration = time.time() - self.start_time
|
|
self.timings.append({
|
|
'label': self.current_label,
|
|
'duration': duration
|
|
})
|
|
self.start_time = None
|
|
return duration
|
|
|
|
def get_total_time(self):
|
|
return sum(t['duration'] for t in self.timings)
|
|
|
|
def report(self):
|
|
print("\n=== Timing Report ===")
|
|
for timing in self.timings:
|
|
print(f"{timing['label']}: {timing['duration']:.2f}s")
|
|
print(f"Total Time: {self.get_total_time():.2f}s")
|
|
|
|
return TimingMonitor()
|
|
|
|
|
|
# ================================================================
|
|
# INTEGRATION TEST FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
async def integration_test_setup(
|
|
mock_external_services,
|
|
sample_bakery_sales_data,
|
|
temp_model_storage
|
|
):
|
|
"""Complete setup for integration tests"""
|
|
|
|
# Patch model storage path
|
|
with patch('app.core.config.settings.MODEL_STORAGE_PATH', str(temp_model_storage)):
|
|
|
|
# Patch data fetching to use sample data
|
|
with patch('app.services.training_service.TrainingService._fetch_sales_data') as mock_fetch:
|
|
mock_fetch.return_value = sample_bakery_sales_data
|
|
|
|
yield {
|
|
'external_services': mock_external_services,
|
|
'sales_data': sample_bakery_sales_data,
|
|
'model_storage': temp_model_storage,
|
|
'mock_fetch': mock_fetch
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_messaging():
|
|
"""Mock messaging system for testing"""
|
|
with patch('app.services.messaging.publish_job_started') as mock_started, \
|
|
patch('app.services.messaging.publish_job_completed') as mock_completed, \
|
|
patch('app.services.messaging.publish_job_failed') as mock_failed, \
|
|
patch('app.services.messaging.publish_model_trained') as mock_model:
|
|
|
|
yield {
|
|
'publish_job_started': mock_started,
|
|
'publish_job_completed': mock_completed,
|
|
'publish_job_failed': mock_failed,
|
|
'publish_model_trained': mock_model
|
|
}
|
|
|
|
|
|
# ================================================================
|
|
# API TEST FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
async def test_app():
|
|
"""Test FastAPI application instance"""
|
|
from app.main import app
|
|
return app
|
|
|
|
@pytest.fixture
|
|
def test_client(test_app):
|
|
"""Create test client for API testing - SYNC VERSION"""
|
|
from httpx import Client
|
|
|
|
with Client(app=test_app, base_url="http://test") as client:
|
|
yield client
|
|
|
|
@pytest.fixture
|
|
def auth_headers():
|
|
"""Mock authentication headers"""
|
|
return {
|
|
"Authorization": "Bearer test_token_123",
|
|
"X-Tenant-ID": "test_tenant_123"
|
|
}
|
|
|
|
|
|
# ================================================================
|
|
# ERROR SIMULATION FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def failing_external_services():
|
|
"""Mock external services that fail for error testing"""
|
|
with patch('app.external.aemet.AEMETClient') as mock_aemet, \
|
|
patch('app.external.madrid_opendata.MadridOpenDataClient') as mock_madrid:
|
|
|
|
# Configure to raise exceptions
|
|
mock_aemet_instance = AsyncMock()
|
|
mock_aemet.return_value = mock_aemet_instance
|
|
mock_aemet_instance.get_historical_weather.side_effect = Exception("AEMET API Error")
|
|
|
|
mock_madrid_instance = AsyncMock()
|
|
mock_madrid.return_value = mock_madrid_instance
|
|
mock_madrid_instance.get_historical_traffic.side_effect = Exception("Madrid API Error")
|
|
|
|
yield {
|
|
'aemet': mock_aemet_instance,
|
|
'madrid': mock_madrid_instance
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def corrupted_sales_data(sample_bakery_sales_data):
|
|
"""Sales data with various quality issues for testing"""
|
|
corrupted_data = sample_bakery_sales_data.copy()
|
|
|
|
# Introduce missing values (20% of quantity data)
|
|
missing_mask = np.random.random(len(corrupted_data)) < 0.2
|
|
corrupted_data.loc[missing_mask, 'quantity'] = np.nan
|
|
|
|
# Introduce extreme outliers (1% of data)
|
|
outlier_mask = np.random.random(len(corrupted_data)) < 0.01
|
|
corrupted_data.loc[outlier_mask, 'quantity'] *= 100
|
|
|
|
# Introduce inconsistent dates (0.5% of data)
|
|
future_mask = np.random.random(len(corrupted_data)) < 0.005
|
|
corrupted_data.loc[future_mask, 'date'] = "2025-12-31"
|
|
|
|
# Introduce negative values (0.2% of data)
|
|
negative_mask = np.random.random(len(corrupted_data)) < 0.002
|
|
corrupted_data.loc[negative_mask, 'quantity'] = -10
|
|
|
|
return corrupted_data
|
|
|
|
|
|
# ================================================================
|
|
# VALIDATION TEST FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def insufficient_sales_data():
|
|
"""Sales data with insufficient volume for training"""
|
|
# Only 10 days of data
|
|
start_date = datetime(2023, 1, 1)
|
|
dates = [start_date + timedelta(days=i) for i in range(10)]
|
|
|
|
data = []
|
|
for date in dates:
|
|
data.append({
|
|
"date": date.strftime("%Y-%m-%d"),
|
|
"product": "Pan Integral",
|
|
"quantity": np.random.randint(10, 50),
|
|
"revenue": round(np.random.uniform(20, 100), 2),
|
|
"temperature": round(np.random.uniform(10, 25), 1),
|
|
"precipitation": 0.0,
|
|
"is_weekend": date.weekday() >= 5,
|
|
"is_holiday": False
|
|
})
|
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
@pytest.fixture
|
|
def seasonal_product_data():
|
|
"""Data for seasonal product (Roscon Reyes) testing"""
|
|
start_date = datetime(2023, 1, 1)
|
|
dates = [start_date + timedelta(days=i) for i in range(365)]
|
|
|
|
data = []
|
|
for date in dates:
|
|
# Roscon Reyes has strong seasonal pattern (Christmas specialty)
|
|
base_qty = 5 # Very low base
|
|
|
|
if date.month == 12: # December - high sales
|
|
base_qty = 20 + (date.day - 1) * 2 # Increasing through December
|
|
elif date.month == 1 and date.day <= 6: # Until Epiphany
|
|
base_qty = 50
|
|
|
|
# Add some noise
|
|
quantity = max(1, int(base_qty + np.random.normal(0, base_qty * 0.2)))
|
|
|
|
data.append({
|
|
"date": date.strftime("%Y-%m-%d"),
|
|
"product": "Roscon Reyes",
|
|
"quantity": quantity,
|
|
"revenue": round(quantity * 25.0, 2), # Expensive specialty item
|
|
"temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi), 1),
|
|
"precipitation": max(0, np.random.exponential(0.5)),
|
|
"is_weekend": date.weekday() >= 5,
|
|
"is_holiday": _is_spanish_holiday(date)
|
|
})
|
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
# ================================================================
|
|
# CLEANUP FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def cleanup_after_test():
|
|
"""Automatic cleanup after each test"""
|
|
yield
|
|
|
|
# Clean up any test files
|
|
import tempfile
|
|
import shutil
|
|
|
|
# Clear any temporary model files
|
|
temp_dirs = [d for d in os.listdir(tempfile.gettempdir()) if d.startswith('test_models_')]
|
|
for temp_dir in temp_dirs:
|
|
try:
|
|
shutil.rmtree(os.path.join(tempfile.gettempdir(), temp_dir))
|
|
except:
|
|
pass
|
|
|
|
|
|
# ================================================================
|
|
# TEST DATA VALIDATION UTILITIES
|
|
# ================================================================
|
|
|
|
class TestDataValidator:
|
|
"""Utility class for validating test data quality"""
|
|
|
|
@staticmethod
|
|
def validate_sales_data(df: pd.DataFrame) -> Dict[str, Any]:
|
|
"""Validate sales data structure and quality"""
|
|
required_columns = ['date', 'product', 'quantity', 'revenue']
|
|
missing_columns = [col for col in required_columns if col not in df.columns]
|
|
|
|
if missing_columns:
|
|
return {'valid': False, 'error': f'Missing columns: {missing_columns}'}
|
|
|
|
# Check data types
|
|
try:
|
|
pd.to_datetime(df['date'])
|
|
except:
|
|
return {'valid': False, 'error': 'Invalid date format'}
|
|
|
|
if not pd.api.types.is_numeric_dtype(df['quantity']):
|
|
return {'valid': False, 'error': 'Quantity must be numeric'}
|
|
|
|
if not pd.api.types.is_numeric_dtype(df['revenue']):
|
|
return {'valid': False, 'error': 'Revenue must be numeric'}
|
|
|
|
# Check for negative values
|
|
if (df['quantity'] < 0).any():
|
|
return {'valid': False, 'error': 'Negative quantities found'}
|
|
|
|
if (df['revenue'] < 0).any():
|
|
return {'valid': False, 'error': 'Negative revenue found'}
|
|
|
|
return {'valid': True, 'rows': len(df), 'products': df['product'].nunique()}
|
|
|
|
|
|
@pytest.fixture
|
|
def test_data_validator():
|
|
"""Test data validator utility"""
|
|
return TestDataValidator()
|
|
|
|
|
|
# ================================================================
|
|
# LOGGING CONFIGURATION FOR TESTS
|
|
# ================================================================
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def configure_test_logging():
|
|
"""Configure logging for tests"""
|
|
import logging
|
|
|
|
# Reduce log level for external libraries during tests
|
|
logging.getLogger('prophet').setLevel(logging.WARNING)
|
|
logging.getLogger('cmdstanpy').setLevel(logging.ERROR)
|
|
logging.getLogger('matplotlib').setLevel(logging.WARNING)
|
|
logging.getLogger('urllib3').setLevel(logging.WARNING)
|
|
|
|
# Configure our app logging for tests
|
|
logger = logging.getLogger('app')
|
|
logger.setLevel(logging.INFO)
|
|
|
|
yield
|
|
|
|
# Reset logging after tests
|
|
logging.getLogger().handlers.clear()
|
|
|
|
|
|
# ================================================================
|
|
# ENVIRONMENT SETUP
|
|
# ================================================================
|
|
|
|
@pytest.fixture(scope="session", autouse=True)
|
|
def setup_test_environment():
|
|
"""Setup test environment variables"""
|
|
os.environ.update({
|
|
'ENVIRONMENT': 'test',
|
|
'LOG_LEVEL': 'INFO',
|
|
'MODEL_STORAGE_PATH': '/tmp/test_models',
|
|
'MAX_TRAINING_TIME_MINUTES': '5',
|
|
'MIN_TRAINING_DATA_DAYS': '7',
|
|
'PROPHET_SEASONALITY_MODE': 'additive',
|
|
'ENABLE_SYNTHETIC_DATA': 'true',
|
|
'SKIP_EXTERNAL_API_CALLS': 'true'
|
|
})
|
|
|
|
yield
|
|
|
|
# Cleanup environment - FIXED: removed (scope="session")
|
|
test_vars = [
|
|
'ENVIRONMENT', 'LOG_LEVEL', 'MODEL_STORAGE_PATH',
|
|
'MAX_TRAINING_TIME_MINUTES', 'MIN_TRAINING_DATA_DAYS',
|
|
'PROPHET_SEASONALITY_MODE', 'ENABLE_SYNTHETIC_DATA',
|
|
'SKIP_EXTERNAL_API_CALLS'
|
|
]
|
|
|
|
for var in test_vars:
|
|
os.environ.pop(var, None) # FIXED: removed the erroneous (scope="session")
|
|
|
|
def event_loop():
|
|
"""Create an instance of the default event loop for the test session."""
|
|
loop = asyncio.new_event_loop()
|
|
yield loop
|
|
loop.close()
|
|
|
|
|
|
def pytest_configure(config):
|
|
"""Configure pytest with custom markers"""
|
|
config.addinivalue_line(
|
|
"markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
|
|
)
|
|
config.addinivalue_line(
|
|
"markers", "integration: marks tests as integration tests"
|
|
)
|
|
config.addinivalue_line(
|
|
"markers", "unit: marks tests as unit tests"
|
|
)
|
|
config.addinivalue_line(
|
|
"markers", "performance: marks tests as performance tests"
|
|
)
|
|
config.addinivalue_line(
|
|
"markers", "external: marks tests that require external services"
|
|
)
|
|
|
|
|
|
def pytest_collection_modifyitems(config, items):
|
|
"""Modify test collection to add markers automatically"""
|
|
for item in items:
|
|
# Mark performance tests
|
|
if "performance" in item.nodeid:
|
|
item.add_marker(pytest.mark.performance)
|
|
item.add_marker(pytest.mark.slow)
|
|
|
|
# Mark integration tests
|
|
if "integration" in item.nodeid:
|
|
item.add_marker(pytest.mark.integration)
|
|
|
|
# Mark end-to-end tests
|
|
if "end_to_end" in item.nodeid:
|
|
item.add_marker(pytest.mark.integration)
|
|
item.add_marker(pytest.mark.external)
|
|
|
|
# Mark unit tests (default for others)
|
|
if not any(marker.name in ["integration", "performance"] for marker in item.iter_markers()):
|
|
item.add_marker(pytest.mark.unit)
|
|
|
|
|
|
# ================================================================
|
|
# TEST DATABASE FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest_asyncio.fixture
|
|
async def test_db_session():
|
|
"""Create async test database session"""
|
|
from app.core.database import database_manager
|
|
|
|
async with database_manager.async_session_local() as session:
|
|
yield session
|
|
|
|
@pytest.fixture
|
|
def training_job_in_db(test_db_session):
|
|
"""Create a training job in database for testing"""
|
|
from app.models.training import ModelTrainingLog # Add this import
|
|
from datetime import datetime
|
|
|
|
job = ModelTrainingLog(
|
|
job_id="test-job-123",
|
|
tenant_id="test-tenant",
|
|
status="running",
|
|
progress=50,
|
|
current_step="Training models",
|
|
start_time=datetime.now(), # Use start_time, not started_at
|
|
config={"include_weather": True},
|
|
created_at=datetime.now(),
|
|
updated_at=datetime.now()
|
|
)
|
|
test_db_session.add(job)
|
|
test_db_session.commit()
|
|
test_db_session.refresh(job)
|
|
return job
|
|
|
|
@pytest.fixture
|
|
def trained_model_in_db(test_db_session):
|
|
"""Create a trained model in database for testing"""
|
|
from app.models.training import TrainedModel # Add this import
|
|
from datetime import datetime
|
|
|
|
model = TrainedModel(
|
|
model_id="test-model-123",
|
|
tenant_id="test-tenant",
|
|
product_name="Pan Integral",
|
|
model_type="prophet",
|
|
model_path="/tmp/test_model.pkl",
|
|
version=1,
|
|
training_samples=100,
|
|
features=["temperature", "humidity"],
|
|
hyperparameters={"seasonality_mode": "additive"},
|
|
training_metrics={"mae": 2.5, "mse": 8.3},
|
|
is_active=True,
|
|
created_at=datetime.now()
|
|
)
|
|
test_db_session.add(model)
|
|
test_db_session.commit()
|
|
test_db_session.refresh(model)
|
|
return model
|
|
|
|
# ================================================================
|
|
# SAMPLE DATA FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def sample_bakery_sales_data():
|
|
"""Generate comprehensive bakery sales data for testing"""
|
|
# Generate 1 year of data
|
|
start_date = datetime(2023, 1, 1)
|
|
dates = [start_date + timedelta(days=i) for i in range(365)]
|
|
|
|
# Spanish bakery products with realistic patterns
|
|
products = [
|
|
"Pan Integral", "Pan Blanco", "Croissant", "Magdalenas",
|
|
"Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
|
|
"Donuts", "Berlinas", "Napolitanas", "Ensaimadas"
|
|
]
|
|
|
|
# Product-specific configurations
|
|
product_config = {
|
|
"Pan Integral": {"base": 80, "price": 2.80, "weekend_boost": 1.1, "seasonal": False},
|
|
"Pan Blanco": {"base": 120, "price": 2.50, "weekend_boost": 1.2, "seasonal": False},
|
|
"Croissant": {"base": 45, "price": 1.50, "weekend_boost": 1.4, "seasonal": False},
|
|
"Magdalenas": {"base": 30, "price": 1.20, "weekend_boost": 1.1, "seasonal": False},
|
|
"Empanadas": {"base": 25, "price": 3.50, "weekend_boost": 0.9, "seasonal": False},
|
|
"Tarta Chocolate": {"base": 15, "price": 18.00, "weekend_boost": 1.6, "seasonal": False},
|
|
"Roscon Reyes": {"base": 8, "price": 25.00, "weekend_boost": 1.0, "seasonal": True},
|
|
"Palmeras": {"base": 12, "price": 1.80, "weekend_boost": 1.2, "seasonal": False},
|
|
"Donuts": {"base": 20, "price": 1.40, "weekend_boost": 1.3, "seasonal": False},
|
|
"Berlinas": {"base": 18, "price": 1.60, "weekend_boost": 1.2, "seasonal": False},
|
|
"Napolitanas": {"base": 22, "price": 1.70, "weekend_boost": 1.1, "seasonal": False},
|
|
"Ensaimadas": {"base": 15, "price": 2.20, "weekend_boost": 1.0, "seasonal": False}
|
|
}
|
|
|
|
data = []
|
|
|
|
for date in dates:
|
|
# Calculate date-specific factors
|
|
day_of_year = date.timetuple().tm_yday
|
|
is_weekend = date.weekday() >= 5
|
|
is_holiday = _is_spanish_holiday(date)
|
|
|
|
# Madrid weather simulation
|
|
temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi) + np.random.normal(0, 3)
|
|
precip = max(0, np.random.exponential(0.8))
|
|
|
|
for product in products:
|
|
config = product_config[product]
|
|
|
|
# Base quantity
|
|
base_qty = config["base"]
|
|
|
|
# Apply weekend boost
|
|
if is_weekend:
|
|
base_qty *= config["weekend_boost"]
|
|
|
|
# Apply holiday boost
|
|
if is_holiday:
|
|
base_qty *= 1.3
|
|
|
|
# Seasonal products (like Roscon Reyes for Christmas)
|
|
if config["seasonal"] and product == "Roscon Reyes":
|
|
if date.month == 12:
|
|
# Exponential increase through December
|
|
base_qty *= (1 + (date.day - 1) / 5)
|
|
elif date.month == 1 and date.day <= 6:
|
|
# High demand until Epiphany (Jan 6)
|
|
base_qty *= 3
|
|
else:
|
|
# Very low demand rest of year
|
|
base_qty *= 0.1
|
|
|
|
# Weather effects
|
|
if temp > 30: # Very hot days
|
|
if product in ["Pan Integral", "Pan Blanco"]:
|
|
base_qty *= 0.7 # Less bread
|
|
elif product in ["Donuts", "Berlinas"]:
|
|
base_qty *= 0.8 # Less fried items
|
|
elif temp < 5: # Cold days
|
|
base_qty *= 1.15 # More baked goods
|
|
|
|
# Add realistic noise and ensure minimum of 1
|
|
quantity = max(1, int(base_qty + np.random.normal(0, base_qty * 0.12)))
|
|
revenue = round(quantity * config["price"], 2)
|
|
|
|
data.append({
|
|
"date": date.strftime("%Y-%m-%d"),
|
|
"product": product,
|
|
"quantity": quantity,
|
|
"revenue": revenue,
|
|
"temperature": round(temp, 1),
|
|
"precipitation": round(precip, 2),
|
|
"is_weekend": is_weekend,
|
|
"is_holiday": is_holiday
|
|
})
|
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_weather_data():
|
|
"""Generate realistic Madrid weather data"""
|
|
start_date = datetime(2023, 1, 1)
|
|
weather_data = []
|
|
|
|
for i in range(365):
|
|
date = start_date + timedelta(days=i)
|
|
day_of_year = date.timetuple().tm_yday
|
|
|
|
# Madrid climate simulation
|
|
base_temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi)
|
|
|
|
# Seasonal humidity patterns
|
|
base_humidity = 50 + 20 * np.sin((day_of_year / 365) * 2 * np.pi + np.pi)
|
|
|
|
weather_data.append({
|
|
"date": date,
|
|
"temperature": round(base_temp + np.random.normal(0, 4), 1),
|
|
"precipitation": max(0, np.random.exponential(1.2)),
|
|
"humidity": np.random.uniform(25, 75),
|
|
"wind_speed": np.random.uniform(3, 20),
|
|
"pressure": np.random.uniform(995, 1025),
|
|
"description": np.random.choice([
|
|
"Soleado", "Parcialmente nublado", "Nublado",
|
|
"Lluvia ligera", "Despejado", "Variable"
|
|
]),
|
|
"source": "aemet_test"
|
|
})
|
|
|
|
return weather_data
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_traffic_data():
|
|
"""Generate realistic Madrid traffic data"""
|
|
start_date = datetime(2023, 1, 1)
|
|
traffic_data = []
|
|
|
|
for i in range(365):
|
|
date = start_date + timedelta(days=i)
|
|
|
|
# Generate multiple measurements per day
|
|
for hour in range(6, 22, 2): # Every 2 hours from 6 AM to 10 PM
|
|
measurement_time = date.replace(hour=hour)
|
|
|
|
# Madrid traffic patterns
|
|
if hour in [7, 8, 9, 18, 19, 20]: # Rush hours
|
|
volume = np.random.randint(1200, 2000)
|
|
congestion = "high"
|
|
speed = np.random.randint(10, 25)
|
|
occupation = np.random.randint(60, 90)
|
|
elif hour in [12, 13, 14]: # Lunch time
|
|
volume = np.random.randint(800, 1200)
|
|
congestion = "medium"
|
|
speed = np.random.randint(20, 35)
|
|
occupation = np.random.randint(40, 70)
|
|
else: # Off-peak
|
|
volume = np.random.randint(300, 800)
|
|
congestion = "low"
|
|
speed = np.random.randint(30, 50)
|
|
occupation = np.random.randint(15, 50)
|
|
|
|
# Weekend adjustment
|
|
if date.weekday() >= 5:
|
|
volume = int(volume * 0.8) # Less traffic on weekends
|
|
speed = min(50, int(speed * 1.2)) # Faster speeds
|
|
|
|
traffic_data.append({
|
|
"date": measurement_time,
|
|
"traffic_volume": volume,
|
|
"occupation_percentage": occupation,
|
|
"load_percentage": min(95, occupation + np.random.randint(5, 15)),
|
|
"average_speed": speed,
|
|
"congestion_level": congestion,
|
|
"pedestrian_count": np.random.randint(100, 800),
|
|
"measurement_point_id": "MADRID_TEST_001",
|
|
"measurement_point_name": "Plaza Mayor",
|
|
"road_type": "URB",
|
|
"source": "madrid_opendata_test"
|
|
})
|
|
|
|
return traffic_data
|
|
|
|
|
|
# ================================================================
|
|
# MOCK SERVICES FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
async def mock_aemet_client(sample_weather_data):
|
|
"""Mock AEMET weather API client"""
|
|
with patch('app.external.aemet.AEMETClient') as mock_class:
|
|
mock_instance = AsyncMock()
|
|
mock_class.return_value = mock_instance
|
|
|
|
# Configure mock responses
|
|
mock_instance.get_historical_weather.return_value = sample_weather_data
|
|
mock_instance.get_current_weather.return_value = sample_weather_data[-1]
|
|
mock_instance.get_weather_forecast.return_value = sample_weather_data[-7:]
|
|
|
|
yield mock_instance
|
|
|
|
|
|
@pytest.fixture
|
|
async def mock_madrid_client(sample_traffic_data):
|
|
"""Mock Madrid OpenData API client"""
|
|
with patch('app.external.madrid_opendata.MadridOpenDataClient') as mock_class:
|
|
mock_instance = AsyncMock()
|
|
mock_class.return_value = mock_instance
|
|
|
|
# Configure mock responses
|
|
mock_instance.get_historical_traffic.return_value = sample_traffic_data
|
|
mock_instance.get_current_traffic.return_value = sample_traffic_data[-1]
|
|
|
|
yield mock_instance
|
|
|
|
|
|
@pytest.fixture
|
|
async def mock_external_services(mock_aemet_client, mock_madrid_client):
|
|
"""Combined mock for all external services"""
|
|
return {
|
|
'aemet': mock_aemet_client,
|
|
'madrid': mock_madrid_client
|
|
}
|
|
|
|
|
|
# ================================================================
|
|
# ML COMPONENT FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def mock_ml_trainer():
|
|
"""Mock ML trainer for testing"""
|
|
with patch('app.ml.trainer.BakeryMLTrainer') as mock_class:
|
|
mock_instance = AsyncMock()
|
|
mock_class.return_value = mock_instance
|
|
|
|
# Configure successful training responses
|
|
mock_instance.train_single_product.return_value = {
|
|
"status": "completed",
|
|
"model_id": "test_model_123",
|
|
"metrics": {
|
|
"mape": 25.5,
|
|
"rmse": 12.3,
|
|
"mae": 8.7,
|
|
"r2_score": 0.85
|
|
},
|
|
"training_duration": 45.2,
|
|
"data_points_used": 365
|
|
}
|
|
|
|
mock_instance.train_tenant_models.return_value = [
|
|
{
|
|
"product_name": "Pan Integral",
|
|
"model_id": "model_pan_integral_123",
|
|
"metrics": {"mape": 22.1, "rmse": 10.5, "mae": 7.8},
|
|
"training_completed": True
|
|
},
|
|
{
|
|
"product_name": "Croissant",
|
|
"model_id": "model_croissant_456",
|
|
"metrics": {"mape": 28.3, "rmse": 8.9, "mae": 6.2},
|
|
"training_completed": True
|
|
}
|
|
]
|
|
|
|
yield mock_instance
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_data_processor():
|
|
"""Mock data processor for testing"""
|
|
with patch('app.ml.data_processor.BakeryDataProcessor') as mock_class:
|
|
mock_instance = AsyncMock()
|
|
mock_class.return_value = mock_instance
|
|
|
|
# Configure mock responses
|
|
mock_instance.validate_data_quality.return_value = {
|
|
"is_valid": True,
|
|
"data_points": 1000,
|
|
"missing_percentage": 2.5,
|
|
"issues": []
|
|
}
|
|
|
|
mock_instance.prepare_training_data.return_value = pd.DataFrame({
|
|
"ds": pd.date_range("2023-01-01", periods=365),
|
|
"y": np.random.randint(10, 100, 365),
|
|
"temperature": np.random.uniform(0, 35, 365),
|
|
"traffic_volume": np.random.randint(100, 2000, 365)
|
|
})
|
|
|
|
yield mock_instance
|
|
|
|
@pytest.fixture
|
|
def mock_data_service():
|
|
"""Mock data service for testing"""
|
|
from unittest.mock import Mock, AsyncMock
|
|
|
|
mock_service = Mock()
|
|
mock_service.get_sales_data = AsyncMock(return_value=[
|
|
{"date": "2024-01-01", "product_name": "Pan Integral", "quantity": 45},
|
|
{"date": "2024-01-02", "product_name": "Pan Integral", "quantity": 38}
|
|
])
|
|
mock_service.get_weather_data = AsyncMock(return_value=[
|
|
{"date": "2024-01-01", "temperature": 20.5, "humidity": 65}
|
|
])
|
|
mock_service.get_traffic_data = AsyncMock(return_value=[
|
|
{"date": "2024-01-01", "traffic_index": 0.7}
|
|
])
|
|
|
|
return mock_service
|
|
|
|
@pytest.fixture
|
|
def mock_prophet_manager():
|
|
"""Mock Prophet manager for testing"""
|
|
with patch('app.ml.prophet_manager.BakeryProphetManager') as mock_class:
|
|
mock_instance = AsyncMock()
|
|
mock_class.return_value = mock_instance
|
|
|
|
# Configure mock responses
|
|
mock_instance.train_model.return_value = {
|
|
"model": Mock(), # Mock Prophet model
|
|
"metrics": {
|
|
"mape": 23.7,
|
|
"rmse": 11.2,
|
|
"mae": 8.1
|
|
},
|
|
"cross_validation": {
|
|
"cv_mape_mean": 25.1,
|
|
"cv_mape_std": 3.2
|
|
}
|
|
}
|
|
|
|
mock_instance.generate_predictions.return_value = pd.DataFrame({
|
|
"ds": pd.date_range("2024-01-01", periods=30),
|
|
"yhat": np.random.uniform(20, 80, 30),
|
|
"yhat_lower": np.random.uniform(10, 60, 30),
|
|
"yhat_upper": np.random.uniform(30, 100, 30)
|
|
})
|
|
|
|
yield mock_instance
|
|
|
|
|
|
# ================================================================
|
|
# UTILITY FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def temp_model_storage():
|
|
"""Temporary directory for model storage during tests"""
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
yield Path(temp_dir)
|
|
|
|
|
|
@pytest.fixture
|
|
def test_config():
|
|
"""Test configuration settings"""
|
|
return {
|
|
"MODEL_STORAGE_PATH": "/tmp/test_models",
|
|
"MAX_TRAINING_TIME_MINUTES": 5,
|
|
"MIN_TRAINING_DATA_DAYS": 7,
|
|
"PROPHET_SEASONALITY_MODE": "additive",
|
|
"INCLUDE_SPANISH_HOLIDAYS": True,
|
|
"ENABLE_SYNTHETIC_DATA": True
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_training_request():
|
|
"""Sample training request for API tests"""
|
|
return {
|
|
"products": ["Pan Integral", "Croissant"],
|
|
"include_weather": True,
|
|
"include_traffic": True,
|
|
"config": {
|
|
"seasonality_mode": "additive",
|
|
"changepoint_prior_scale": 0.05,
|
|
"seasonality_prior_scale": 10.0,
|
|
"validation_enabled": True
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_single_product_request():
|
|
"""Sample single product training request"""
|
|
return {
|
|
"product_name": "Pan Integral",
|
|
"include_weather": True,
|
|
"include_traffic": False,
|
|
"config": {
|
|
"seasonality_mode": "multiplicative",
|
|
"include_holidays": True,
|
|
"holiday_prior_scale": 15.0
|
|
}
|
|
}
|
|
|
|
|
|
# ================================================================
|
|
# HELPER FUNCTIONS
|
|
# ================================================================
|
|
|
|
def _is_spanish_holiday(date: datetime) -> bool:
|
|
"""Check if date is a Spanish holiday"""
|
|
spanish_holidays = [
|
|
(1, 1), # Año Nuevo
|
|
(1, 6), # Reyes Magos
|
|
(5, 1), # Día del Trabajo
|
|
(8, 15), # Asunción de la Virgen
|
|
(10, 12), # Fiesta Nacional de España
|
|
(11, 1), # Todos los Santos
|
|
(12, 6), # Día de la Constitución
|
|
(12, 8), # Inmaculada Concepción
|
|
(12, 25), # Navidad
|
|
]
|
|
return (date.month, date.day) in spanish_holidays
|
|
|
|
|
|
@pytest.fixture
|
|
def spanish_holidays_2023():
|
|
"""List of Spanish holidays for 2023"""
|
|
holidays = []
|
|
for month, day in [
|
|
(1, 1), (1, 6), (5, 1), (8, 15), (10, 12),
|
|
(11, 1), (12, 6), (12, 8), (12, 25)
|
|
]:
|
|
holidays.append(datetime(2023, month, day))
|
|
return holidays
|
|
|
|
|
|
# ================================================================
|
|
# PERFORMANCE TESTING FIXTURES
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def large_dataset_for_performance():
|
|
"""Generate large dataset for performance testing"""
|
|
# Generate 2 years of data with 15 products
|
|
start_date = datetime(2022, 1, 1)
|
|
end_date = datetime(2024, 1, 1)
|
|
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
|
|
|
|
products = [
|
|
"Pan Integral", "Pan Blanco", "Croissant", "Magdalenas",
|
|
"Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
|
|
"Donuts", "Berlinas", "Napolitanas", "Ensaimadas",
|
|
"Baguette", "Pan de Molde", "Bizcocho"
|
|
]
|
|
|
|
data = []
|
|
for date in date_range:
|
|
for product in products:
|
|
# Realistic sales with patterns
|
|
base_quantity = np.random.randint(5, 150)
|
|
|
|
# Seasonal patterns
|
|
if date.month in [12, 1]: # Winter/Holiday season
|
|
base_quantity *= 1.4
|
|
elif date.month in [6, 7, 8]: # Summer
|
|
base_quantity *= 0.8
|
|
|
|
# Weekly patterns
|
|
if date.weekday() >= 5: # Weekends
|
|
base_quantity *= 1.2
|
|
elif date.weekday() == 0: # Monday
|
|
base_quantity *= 0.7
|
|
|
|
# Add noise
|
|
quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1)))
|
|
|
|
data.append({
|
|
"date": date.strftime("%Y-%m-%d"),
|
|
"product": product,
|
|
"quantity": quantity,
|
|
"revenue": round(quantity * np.random.uniform(1.5, 8.0), 2),
|
|
"temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1),
|
|
"precipitation": max(0, np.random.exponential(0.8)),
|
|
"is_weekend": date.weekday() >= 5,
|
|
"is_holiday": _is_spanish_holiday(date)
|
|
})
|
|
|
|
return pd.DataFrame(data)
|
|
|
|
|
|
@pytest.fixture
|
|
def memory_monitor():
|
|
"""Memory monitoring utility for performance tests"""
|
|
import psutil
|
|
import gc
|
|
|
|
class MemoryMonitor:
|
|
def __init__(self):
|
|
self.process = psutil.Process()
|
|
self.snapshots = []
|
|
|
|
def snapshot(self, label: str):
|
|
gc.collect() # Force garbage collection
|
|
memory_mb = self.process.memory_info().rss / 1024 / 1024
|
|
self.snapshots.append({
|
|
'label': label,
|
|
'memory_mb': memory_mb,
|
|
'timestamp': datetime.now()
|
|
})
|
|
return memory_mb
|
|
|
|
def get_peak_usage(self):
|
|
if not self.snapshots:
|
|
return 0
|
|
return max(s['memory_mb'] for s in self.snapshots)
|
|
|
|
def get_usage_increase(self):
|
|
if len(self.snapshots) < 2:
|
|
return 0
|
|
return self.snapshots[-1]['memory_mb'] - self.snapshots[0]['memory_mb']
|
|
|
|
def report(self):
|
|
print("\n=== Memory Usage Report ===")
|
|
for snapshot in self.snapshots:
|
|
print(f"{snapshot['label']}: {snapshot['memory_mb']:.2f} MB")
|
|
print(f"Peak Usage: {self.get_peak_usage():.2f} MB")
|
|
print(f"Total Increase: {self.get_usage_increase():.2f} MB")
|
|
|
|
return MemoryMonitor()
|
|
|
|
|
|
@pytest.fixture
|
|
def timing_monitor():
|
|
"""Timing monitoring utility for performance tests"""
|
|
import time
|
|
|
|
class TimingMonitor:
|
|
def __init__(self):
|
|
self.timings = []
|
|
self.start_time = None
|
|
|
|
def start(self, label: str):
|
|
self.start_time = time.time()
|
|
self.current_label = label
|
|
|
|
def stop(self):
|
|
if self.start_time is None:
|
|
return 0
|
|
|
|
duration = time.time() - self.start_time
|
|
self.timings.append({
|
|
'label': self.current_label,
|
|
'duration': duration
|
|
})
|
|
self.start_time = None
|
|
return duration
|
|
|
|
def get_total_time(self):
|
|
return sum(t['duration'] for t in self.timings)
|
|
|
|
def report(self):
|
|
print("\n=== Timing Report ===")
|
|
for timing in self.timings:
|
|
print(f"{timing['label']}: {timing['duration']:.2f}s")
|
|
print(f"Total Time: {self.get_total_time():.2f}s")
|
|
|
|
return TimingMonitor()
|
|
|
|
|
|
# ================================================================
|
|
# ADDITIONAL FIXTURES FOR COMPREHENSIVE TESTING
|
|
# ================================================================
|
|
|
|
@pytest.fixture
|
|
def mock_job_scheduler():
|
|
"""Mock job scheduler for testing"""
|
|
with patch('app.services.job_scheduler.JobScheduler') as mock_scheduler:
|
|
mock_instance = Mock()
|
|
mock_scheduler.return_value = mock_instance
|
|
|
|
mock_instance.schedule_job.return_value = "scheduled_job_123"
|
|
mock_instance.cancel_job.return_value = True
|
|
mock_instance.get_job_status.return_value = "running"
|
|
|
|
yield mock_instance
|
|
|
|
|
|
@pytest.fixture
|
|
def sample_model_metadata():
|
|
"""Sample model metadata for testing"""
|
|
return {
|
|
"model_id": "test_model_123",
|
|
"tenant_id": "test_tenant",
|
|
"product_name": "Pan Integral",
|
|
"model_type": "prophet",
|
|
"training_date": datetime.now().isoformat(),
|
|
"data_points_used": 365,
|
|
"features_used": ["temperature", "is_weekend", "is_holiday"],
|
|
"metrics": {
|
|
"mape": 23.5,
|
|
"rmse": 12.3,
|
|
"mae": 8.7,
|
|
"r2_score": 0.85
|
|
},
|
|
"hyperparameters": {
|
|
"seasonality_mode": "additive",
|
|
"changepoint_prior_scale": 0.05,
|
|
"seasonality_prior_scale": 10.0
|
|
},
|
|
"version": "1.0",
|
|
"status": "active"
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def training_progress_states():
|
|
"""Different training progress states for testing"""
|
|
return [
|
|
{"status": "pending", "progress": 0, "current_step": "Initializing training job"},
|
|
{"status": "running", "progress": 10, "current_step": "Fetching sales data"},
|
|
{"status": "running", "progress": 25, "current_step": "Processing weather data"},
|
|
{"status": "running", "progress": 40, "current_step": "Processing traffic data"},
|
|
{"status": "running", "progress": 55, "current_step": "Engineering features"},
|
|
{"status": "running", "progress": 70, "current_step": "Training Pan Integral model"},
|
|
{"status": "running", "progress": 85, "current_step": "Validating model performance"},
|
|
{"status": "running", "progress": 95, "current_step": "Saving model artifacts"},
|
|
{"status": "completed", "progress": 100, "current_step": "Training completed successfully"}
|
|
]
|
|
|
|
|
|
@pytest.fixture
|
|
def error_scenarios():
|
|
"""Different error scenarios for testing"""
|
|
return {
|
|
"insufficient_data": {
|
|
"error_type": "DataError",
|
|
"error_message": "Insufficient training data: only 15 days available, minimum 30 required",
|
|
"error_code": "INSUFFICIENT_DATA"
|
|
},
|
|
"external_api_failure": {
|
|
"error_type": "ExternalAPIError",
|
|
"error_message": "Failed to fetch weather data from AEMET API",
|
|
"error_code": "WEATHER_API_ERROR"
|
|
},
|
|
"model_training_failure": {
|
|
"error_type": "ModelTrainingError",
|
|
"error_message": "Prophet model training failed: unable to fit data",
|
|
"error_code": "MODEL_TRAINING_FAILED"
|
|
},
|
|
"data_quality_error": {
|
|
"error_type": "DataQualityError",
|
|
"error_message": "Data quality issues detected: 45% missing values in quantity column",
|
|
"error_code": "DATA_QUALITY_POOR"
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def performance_benchmarks():
|
|
"""Performance benchmarks for testing"""
|
|
return {
|
|
"single_product_training": {
|
|
"max_duration_seconds": 120,
|
|
"max_memory_mb": 500,
|
|
"min_accuracy_mape": 50
|
|
},
|
|
"multi_product_training": {
|
|
"max_duration_seconds": 300,
|
|
"max_memory_mb": 1000,
|
|
"min_accuracy_mape": 55
|
|
},
|
|
"data_processing": {
|
|
"max_throughput_rows_per_second": 1000,
|
|
"max_memory_per_1k_rows_mb": 10
|
|
},
|
|
"concurrent_jobs": {
|
|
"max_concurrent_jobs": 5,
|
|
"max_queue_time_seconds": 30
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_model_storage():
|
|
"""Mock model storage system for testing"""
|
|
storage = {}
|
|
|
|
class MockModelStorage:
|
|
def save_model(self, model_id: str, model_data: Any, metadata: Dict[str, Any]):
|
|
storage[model_id] = {
|
|
"model_data": model_data,
|
|
"metadata": metadata,
|
|
"saved_at": datetime.now()
|
|
}
|
|
return f"/models/{model_id}.pkl"
|
|
|
|
def load_model(self, model_id: str):
|
|
if model_id in storage:
|
|
return storage[model_id]["model_data"]
|
|
raise FileNotFoundError(f"Model {model_id} not found")
|
|
|
|
def get_metadata(self, model_id: str):
|
|
if model_id in storage:
|
|
return storage[model_id]["metadata"]
|
|
raise FileNotFoundError(f"Model {model_id} not found")
|
|
|
|
def delete_model(self, model_id: str):
|
|
if model_id in storage:
|
|
del storage[model_id]
|
|
return True
|
|
return False
|
|
|
|
def list_models(self, tenant_id: str = None):
|
|
models = []
|
|
for model_id, data in storage.items():
|
|
if tenant_id is None or data["metadata"].get("tenant_id") == tenant_id:
|
|
models.append({
|
|
"model_id": model_id,
|
|
"metadata": data["metadata"],
|
|
"saved_at": data["saved_at"]
|
|
})
|
|
return models
|
|
|
|
return MockModelStorage()
|
|
|
|
|
|
@pytest.fixture
|
|
def real_world_scenarios():
|
|
"""Real-world bakery scenarios for testing"""
|
|
return {
|
|
"holiday_rush": {
|
|
"description": "Christmas season with high demand for seasonal products",
|
|
"date_range": ("2023-12-15", "2023-12-31"),
|
|
"expected_patterns": {
|
|
"Roscon Reyes": {"multiplier": 5.0, "trend": "increasing"},
|
|
"Pan Integral": {"multiplier": 1.3, "trend": "stable"},
|
|
"Tarta Chocolate": {"multiplier": 2.0, "trend": "increasing"}
|
|
}
|
|
},
|
|
"summer_slowdown": {
|
|
"description": "Summer period with generally lower sales",
|
|
"date_range": ("2023-07-01", "2023-08-31"),
|
|
"expected_patterns": {
|
|
"Pan Integral": {"multiplier": 0.8, "trend": "decreasing"},
|
|
"Croissant": {"multiplier": 0.9, "trend": "stable"},
|
|
"Cold_drinks": {"multiplier": 1.5, "trend": "increasing"}
|
|
}
|
|
},
|
|
"weekend_patterns": {
|
|
"description": "Weekend shopping patterns",
|
|
"expected_patterns": {
|
|
"weekend_boost": 1.2,
|
|
"peak_hours": ["10:00", "11:00", "18:00", "19:00"],
|
|
"popular_products": ["Croissant", "Palmeras", "Tarta Chocolate"]
|
|
}
|
|
},
|
|
"weather_impact": {
|
|
"description": "Weather impact on sales",
|
|
"scenarios": {
|
|
"rainy_day": {"bread_sales": 1.1, "pastry_sales": 0.9},
|
|
"hot_day": {"bread_sales": 0.8, "cold_items": 1.3},
|
|
"cold_day": {"bread_sales": 1.2, "hot_items": 1.4}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def data_quality_test_cases():
|
|
"""Various data quality test cases"""
|
|
return {
|
|
"missing_values": {
|
|
"quantity_missing_5pct": 0.05,
|
|
"quantity_missing_20pct": 0.20,
|
|
"quantity_missing_50pct": 0.50,
|
|
"revenue_missing_10pct": 0.10
|
|
},
|
|
"outliers": {
|
|
"extreme_high": 100, # 100x normal values
|
|
"extreme_low": 0.01, # Near-zero values
|
|
"negative_values": -1,
|
|
"outlier_percentage": 0.01
|
|
},
|
|
"inconsistencies": {
|
|
"future_dates": ["2025-12-31", "2026-01-01"],
|
|
"invalid_dates": ["2023-13-01", "2023-02-30"],
|
|
"mismatched_revenue": True, # Revenue doesn't match quantity * price
|
|
"duplicate_records": True
|
|
},
|
|
"insufficient_data": {
|
|
"too_few_days": 10,
|
|
"too_few_products": 1,
|
|
"sporadic_data": 0.3 # Only 30% of expected data points
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def api_test_scenarios():
|
|
"""API testing scenarios"""
|
|
return {
|
|
"authentication": {
|
|
"valid_token": "Bearer valid_test_token_123",
|
|
"invalid_token": "Bearer invalid_token",
|
|
"expired_token": "Bearer expired_token_456",
|
|
"missing_token": None
|
|
},
|
|
"request_validation": {
|
|
"valid_request": {
|
|
"products": ["Pan Integral"],
|
|
"include_weather": True,
|
|
"include_traffic": True,
|
|
"config": {"seasonality_mode": "additive"}
|
|
},
|
|
"invalid_products": {
|
|
"products": [], # Empty products list
|
|
"include_weather": True
|
|
},
|
|
"invalid_config": {
|
|
"products": ["Pan Integral"],
|
|
"config": {"seasonality_mode": "invalid_mode"}
|
|
},
|
|
"missing_required_fields": {
|
|
"include_weather": True # Missing products
|
|
}
|
|
},
|
|
"rate_limiting": {
|
|
"max_requests_per_minute": 60,
|
|
"burst_requests": 100
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def integration_test_dependencies():
|
|
"""Dependencies for integration testing"""
|
|
|
|
class IntegrationDependencies:
|
|
def __init__(self):
|
|
self.external_services = {}
|
|
self.databases = {}
|
|
self.message_queues = {}
|
|
self.storage_systems = {}
|
|
|
|
def register_external_service(self, name: str, mock_instance):
|
|
self.external_services[name] = mock_instance
|
|
|
|
def register_database(self, name: str, mock_session):
|
|
self.databases[name] = mock_session
|
|
|
|
def register_message_queue(self, name: str, mock_queue):
|
|
self.message_queues[name] = mock_queue
|
|
|
|
def register_storage(self, name: str, mock_storage):
|
|
self.storage_systems[name] = mock_storage
|
|
|
|
def get_service(self, name: str):
|
|
return self.external_services.get(name)
|
|
|
|
def get_database(self, name: str):
|
|
return self.databases.get(name)
|
|
|
|
def are_all_services_healthy(self):
|
|
# Mock health check for all registered services
|
|
return len(self.external_services) > 0
|
|
|
|
return IntegrationDependencies()
|
|
|
|
|
|
@pytest.fixture
|
|
def load_test_configuration():
|
|
"""Configuration for load testing"""
|
|
return {
|
|
"concurrent_users": {
|
|
"light_load": 5,
|
|
"medium_load": 15,
|
|
"heavy_load": 30,
|
|
"stress_load": 50
|
|
},
|
|
"test_duration": {
|
|
"quick_test": 60, # 1 minute
|
|
"standard_test": 300, # 5 minutes
|
|
"extended_test": 900 # 15 minutes
|
|
},
|
|
"request_patterns": {
|
|
"constant_rate": "steady",
|
|
"ramp_up": "increasing",
|
|
"spike": "burst",
|
|
"random": "variable"
|
|
},
|
|
"success_criteria": {
|
|
"min_success_rate": 0.95,
|
|
"max_response_time": 30.0, # seconds
|
|
"max_error_rate": 0.05
|
|
}
|
|
}
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_notification_system():
|
|
"""Mock notification system for testing"""
|
|
notifications_sent = []
|
|
|
|
class MockNotificationSystem:
|
|
def send_training_started(self, tenant_id: str, job_id: str, products: List[str]):
|
|
notification = {
|
|
"type": "training_started",
|
|
"tenant_id": tenant_id,
|
|
"job_id": job_id,
|
|
"products": products,
|
|
"timestamp": datetime.now()
|
|
}
|
|
notifications_sent.append(notification)
|
|
return notification
|
|
|
|
def send_training_completed(self, tenant_id: str, job_id: str, results: Dict[str, Any]):
|
|
notification = {
|
|
"type": "training_completed",
|
|
"tenant_id": tenant_id,
|
|
"job_id": job_id,
|
|
"results": results,
|
|
"timestamp": datetime.now()
|
|
}
|
|
notifications_sent.append(notification)
|
|
return notification
|
|
|
|
def send_training_failed(self, tenant_id: str, job_id: str, error: str):
|
|
notification = {
|
|
"type": "training_failed",
|
|
"tenant_id": tenant_id,
|
|
"job_id": job_id,
|
|
"error": error,
|
|
"timestamp": datetime.now()
|
|
}
|
|
notifications_sent.append(notification)
|
|
return notification
|
|
|
|
def get_notifications(self, tenant_id: str = None):
|
|
if tenant_id:
|
|
return [n for n in notifications_sent if n["tenant_id"] == tenant_id]
|
|
return notifications_sent
|
|
|
|
def clear_notifications(self):
|
|
notifications_sent.clear()
|
|
|
|
return MockNotificationSystem()
|
|
|
|
|
|
@pytest.fixture
|
|
def test_metrics_collector():
|
|
"""Test metrics collector for monitoring test performance"""
|
|
metrics = {}
|
|
|
|
class TestMetricsCollector:
|
|
def __init__(self):
|
|
self.start_times = {}
|
|
self.counters = {}
|
|
self.gauges = {}
|
|
self.histograms = {}
|
|
|
|
def start_timer(self, metric_name: str):
|
|
self.start_times[metric_name] = time.time()
|
|
|
|
def end_timer(self, metric_name: str):
|
|
if metric_name in self.start_times:
|
|
duration = time.time() - self.start_times[metric_name]
|
|
if metric_name not in self.histograms:
|
|
self.histograms[metric_name] = []
|
|
self.histograms[metric_name].append(duration)
|
|
del self.start_times[metric_name]
|
|
return duration
|
|
return 0
|
|
|
|
def increment_counter(self, counter_name: str, value: int = 1):
|
|
self.counters[counter_name] = self.counters.get(counter_name, 0) + value
|
|
|
|
def set_gauge(self, gauge_name: str, value: float):
|
|
self.gauges[gauge_name] = value
|
|
|
|
def get_counter(self, counter_name: str):
|
|
return self.counters.get(counter_name, 0)
|
|
|
|
def get_gauge(self, gauge_name: str):
|
|
return self.gauges.get(gauge_name, 0)
|
|
|
|
def get_histogram_stats(self, histogram_name: str):
|
|
if histogram_name not in self.histograms:
|
|
return {}
|
|
|
|
values = self.histograms[histogram_name]
|
|
return {
|
|
"count": len(values),
|
|
"min": min(values) if values else 0,
|
|
"max": max(values) if values else 0,
|
|
"avg": sum(values) / len(values) if values else 0,
|
|
"p50": sorted(values)[len(values)//2] if values else 0,
|
|
"p95": sorted(values)[int(len(values)*0.95)] if values else 0,
|
|
"p99": sorted(values)[int(len(values)*0.99)] if values else 0
|
|
}
|
|
|
|
def get_all_metrics(self):
|
|
return {
|
|
"counters": self.counters,
|
|
"gauges": self.gauges,
|
|
"histograms": {name: self.get_histogram_stats(name) for name in self.histograms}
|
|
}
|
|
|
|
def reset(self):
|
|
self.start_times.clear()
|
|
self.counters.clear()
|
|
self.gauges.clear()
|
|
self.histograms.clear()
|
|
|
|
import time
|
|
return TestMetricsCollector()
|
|
|
|
|
|
# ================================================================
|
|
# PYTEST PLUGINS AND HOOKS
|
|
# ================================================================
|
|
|
|
def pytest_runtest_setup(item):
|
|
"""Setup before each test"""
|
|
# Add any pre-test setup logic here
|
|
pass
|
|
|
|
|
|
def pytest_runtest_teardown(item, nextitem):
|
|
"""Teardown after each test"""
|
|
# Add any post-test cleanup logic here
|
|
import gc
|
|
gc.collect() # Force garbage collection after each test
|
|
|
|
|
|
def pytest_sessionstart(session):
|
|
"""Called after the Session object has been created"""
|
|
print("\n" + "="*80)
|
|
print("TRAINING SERVICE TEST SESSION STARTING")
|
|
print("="*80)
|
|
|
|
|
|
def pytest_sessionfinish(session, exitstatus):
|
|
"""Called after whole test run finished"""
|
|
print("\n" + "="*80)
|
|
print("TRAINING SERVICE TEST SESSION FINISHED")
|
|
print(f"Exit Status: {exitstatus}")
|
|
print("="*80)
|
|
|
|
|
|
# ================================================================
|
|
# FINAL CONFIGURATION
|
|
# ================================================================
|
|
|
|
# Ensure numpy doesn't use too many threads during testing
|
|
import numpy as np
|
|
np.seterr(all='ignore') # Ignore numpy warnings during tests
|
|
|
|
# Configure pandas for testing
|
|
import pandas as pd
|
|
pd.set_option('display.max_columns', None)
|
|
pd.set_option('display.width', None)
|
|
pd.set_option('display.max_colwidth', 50)
|
|
|
|
# Set random seeds for reproducible tests
|
|
np.random.seed(42)
|
|
import random
|
|
random.seed(42) |