Files
bakery-ia/services/training/tests/conftest.py

1595 lines
54 KiB
Python
Raw Normal View History

# ================================================================
2025-07-19 16:59:37 +02:00
# services/training/tests/conftest.py
# ================================================================
2025-07-19 16:59:37 +02:00
"""
Test configuration and fixtures for Training Service
Provides shared fixtures, mock data, and test utilities
2025-07-19 16:59:37 +02:00
"""
import pytest
import asyncio
import pandas as pd
import numpy as np
import tempfile
2025-07-19 16:59:37 +02:00
import os
import json
from datetime import datetime, timedelta
from unittest.mock import Mock, AsyncMock, patch
from typing import Dict, List, Any, Generator
from pathlib import Path
import logging
# Configure pytest-asyncio
pytestmark = pytest.mark.asyncio
# Suppress Prophet logging during tests
logging.getLogger('prophet').setLevel(logging.WARNING)
logging.getLogger('cmdstanpy').setLevel(logging.WARNING)
2025-07-19 16:59:37 +02:00
# ================================================================
# PYTEST CONFIGURATION
# ================================================================
@pytest.fixture
def large_dataset_for_performance():
"""Generate large dataset for performance testing"""
# Generate 2 years of data with 15 products
start_date = datetime(2022, 1, 1)
end_date = datetime(2024, 1, 1)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
products = [
"Pan Integral", "Pan Blanco", "Croissant", "Magdalenas",
"Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
"Donuts", "Berlinas", "Napolitanas", "Ensaimadas",
"Baguette", "Pan de Molde", "Bizcocho"
]
2025-07-19 16:59:37 +02:00
data = []
for date in date_range:
for product in products:
# Realistic sales with patterns
base_quantity = np.random.randint(5, 150)
# Seasonal patterns
if date.month in [12, 1]: # Winter/Holiday season
base_quantity *= 1.4
elif date.month in [6, 7, 8]: # Summer
base_quantity *= 0.8
# Weekly patterns
if date.weekday() >= 5: # Weekends
base_quantity *= 1.2
elif date.weekday() == 0: # Monday
base_quantity *= 0.7
# Add noise
quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1)))
data.append({
"date": date.strftime("%Y-%m-%d"),
"product": product,
"quantity": quantity,
"revenue": round(quantity * np.random.uniform(1.5, 8.0), 2),
"temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1),
"precipitation": max(0, np.random.exponential(0.8)),
"is_weekend": date.weekday() >= 5,
"is_holiday": _is_spanish_holiday(date)
})
2025-07-19 16:59:37 +02:00
return pd.DataFrame(data)
@pytest.fixture
def memory_monitor():
"""Memory monitoring utility for performance tests"""
import psutil
import gc
2025-07-19 16:59:37 +02:00
class MemoryMonitor:
def __init__(self):
self.process = psutil.Process()
self.snapshots = []
def snapshot(self, label: str):
gc.collect() # Force garbage collection
memory_mb = self.process.memory_info().rss / 1024 / 1024
self.snapshots.append({
'label': label,
'memory_mb': memory_mb,
'timestamp': datetime.now()
})
return memory_mb
def get_peak_usage(self):
if not self.snapshots:
return 0
return max(s['memory_mb'] for s in self.snapshots)
def get_usage_increase(self):
if len(self.snapshots) < 2:
return 0
return self.snapshots[-1]['memory_mb'] - self.snapshots[0]['memory_mb']
def report(self):
print("\n=== Memory Usage Report ===")
for snapshot in self.snapshots:
print(f"{snapshot['label']}: {snapshot['memory_mb']:.2f} MB")
print(f"Peak Usage: {self.get_peak_usage():.2f} MB")
print(f"Total Increase: {self.get_usage_increase():.2f} MB")
2025-07-19 16:59:37 +02:00
return MemoryMonitor()
2025-07-19 16:59:37 +02:00
@pytest.fixture
def timing_monitor():
"""Timing monitoring utility for performance tests"""
import time
class TimingMonitor:
def __init__(self):
self.timings = []
self.start_time = None
def start(self, label: str):
self.start_time = time.time()
self.current_label = label
def stop(self):
if self.start_time is None:
return 0
duration = time.time() - self.start_time
self.timings.append({
'label': self.current_label,
'duration': duration
})
self.start_time = None
return duration
def get_total_time(self):
return sum(t['duration'] for t in self.timings)
def report(self):
print("\n=== Timing Report ===")
for timing in self.timings:
print(f"{timing['label']}: {timing['duration']:.2f}s")
print(f"Total Time: {self.get_total_time():.2f}s")
2025-07-19 16:59:37 +02:00
return TimingMonitor()
# ================================================================
# INTEGRATION TEST FIXTURES
# ================================================================
2025-07-19 16:59:37 +02:00
@pytest.fixture
async def integration_test_setup(
mock_external_services,
sample_bakery_sales_data,
temp_model_storage
):
"""Complete setup for integration tests"""
2025-07-19 16:59:37 +02:00
# Patch model storage path
with patch('app.core.config.settings.MODEL_STORAGE_PATH', str(temp_model_storage)):
# Patch data fetching to use sample data
with patch('app.services.training_service.TrainingService._fetch_sales_data') as mock_fetch:
mock_fetch.return_value = sample_bakery_sales_data
yield {
'external_services': mock_external_services,
'sales_data': sample_bakery_sales_data,
'model_storage': temp_model_storage,
'mock_fetch': mock_fetch
}
2025-07-19 16:59:37 +02:00
@pytest.fixture
def mock_messaging():
"""Mock messaging system for testing"""
with patch('app.services.messaging.publish_job_started') as mock_started, \
patch('app.services.messaging.publish_job_completed') as mock_completed, \
patch('app.services.messaging.publish_job_failed') as mock_failed, \
patch('app.services.messaging.publish_model_trained') as mock_model:
yield {
'publish_job_started': mock_started,
'publish_job_completed': mock_completed,
'publish_job_failed': mock_failed,
'publish_model_trained': mock_model
}
# ================================================================
# API TEST FIXTURES
# ================================================================
@pytest.fixture
async def test_app():
"""Test FastAPI application instance"""
from app.main import app
return app
2025-07-19 16:59:37 +02:00
@pytest.fixture
async def test_client(test_app):
"""Test client for API testing"""
from httpx import AsyncClient
async with AsyncClient(app=test_app, base_url="http://test") as client:
2025-07-19 16:59:37 +02:00
yield client
2025-07-19 16:59:37 +02:00
@pytest.fixture
def auth_headers():
"""Mock authentication headers"""
return {
"Authorization": "Bearer test_token_123",
"X-Tenant-ID": "test_tenant_123"
}
# ================================================================
# ERROR SIMULATION FIXTURES
# ================================================================
@pytest.fixture
def failing_external_services():
"""Mock external services that fail for error testing"""
with patch('app.external.aemet.AEMETClient') as mock_aemet, \
patch('app.external.madrid_opendata.MadridOpenDataClient') as mock_madrid:
# Configure to raise exceptions
mock_aemet_instance = AsyncMock()
mock_aemet.return_value = mock_aemet_instance
mock_aemet_instance.get_historical_weather.side_effect = Exception("AEMET API Error")
mock_madrid_instance = AsyncMock()
mock_madrid.return_value = mock_madrid_instance
mock_madrid_instance.get_historical_traffic.side_effect = Exception("Madrid API Error")
2025-07-19 16:59:37 +02:00
yield {
'aemet': mock_aemet_instance,
'madrid': mock_madrid_instance
2025-07-19 16:59:37 +02:00
}
2025-07-19 16:59:37 +02:00
@pytest.fixture
def corrupted_sales_data(sample_bakery_sales_data):
"""Sales data with various quality issues for testing"""
corrupted_data = sample_bakery_sales_data.copy()
# Introduce missing values (20% of quantity data)
missing_mask = np.random.random(len(corrupted_data)) < 0.2
corrupted_data.loc[missing_mask, 'quantity'] = np.nan
# Introduce extreme outliers (1% of data)
outlier_mask = np.random.random(len(corrupted_data)) < 0.01
corrupted_data.loc[outlier_mask, 'quantity'] *= 100
# Introduce inconsistent dates (0.5% of data)
future_mask = np.random.random(len(corrupted_data)) < 0.005
corrupted_data.loc[future_mask, 'date'] = "2025-12-31"
# Introduce negative values (0.2% of data)
negative_mask = np.random.random(len(corrupted_data)) < 0.002
corrupted_data.loc[negative_mask, 'quantity'] = -10
2025-07-19 16:59:37 +02:00
return corrupted_data
# ================================================================
# VALIDATION TEST FIXTURES
# ================================================================
@pytest.fixture
def insufficient_sales_data():
"""Sales data with insufficient volume for training"""
# Only 10 days of data
start_date = datetime(2023, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(10)]
data = []
for date in dates:
data.append({
"date": date.strftime("%Y-%m-%d"),
"product": "Pan Integral",
"quantity": np.random.randint(10, 50),
"revenue": round(np.random.uniform(20, 100), 2),
"temperature": round(np.random.uniform(10, 25), 1),
2025-07-19 16:59:37 +02:00
"precipitation": 0.0,
"is_weekend": date.weekday() >= 5,
"is_holiday": False
})
return pd.DataFrame(data)
@pytest.fixture
def seasonal_product_data():
"""Data for seasonal product (Roscon Reyes) testing"""
start_date = datetime(2023, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(365)]
data = []
for date in dates:
# Roscon Reyes has strong seasonal pattern (Christmas specialty)
base_qty = 5 # Very low base
if date.month == 12: # December - high sales
base_qty = 20 + (date.day - 1) * 2 # Increasing through December
elif date.month == 1 and date.day <= 6: # Until Epiphany
base_qty = 50
# Add some noise
quantity = max(1, int(base_qty + np.random.normal(0, base_qty * 0.2)))
data.append({
"date": date.strftime("%Y-%m-%d"),
"product": "Roscon Reyes",
"quantity": quantity,
"revenue": round(quantity * 25.0, 2), # Expensive specialty item
"temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi), 1),
"precipitation": max(0, np.random.exponential(0.5)),
"is_weekend": date.weekday() >= 5,
"is_holiday": _is_spanish_holiday(date)
})
return pd.DataFrame(data)
# ================================================================
# CLEANUP FIXTURES
# ================================================================
@pytest.fixture(autouse=True)
def cleanup_after_test():
"""Automatic cleanup after each test"""
yield
# Clean up any test files
import tempfile
import shutil
# Clear any temporary model files
temp_dirs = [d for d in os.listdir(tempfile.gettempdir()) if d.startswith('test_models_')]
for temp_dir in temp_dirs:
try:
shutil.rmtree(os.path.join(tempfile.gettempdir(), temp_dir))
except:
pass
# ================================================================
# TEST DATA VALIDATION UTILITIES
# ================================================================
class TestDataValidator:
"""Utility class for validating test data quality"""
@staticmethod
def validate_sales_data(df: pd.DataFrame) -> Dict[str, Any]:
"""Validate sales data structure and quality"""
required_columns = ['date', 'product', 'quantity', 'revenue']
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
return {'valid': False, 'error': f'Missing columns: {missing_columns}'}
# Check data types
try:
pd.to_datetime(df['date'])
except:
return {'valid': False, 'error': 'Invalid date format'}
if not pd.api.types.is_numeric_dtype(df['quantity']):
return {'valid': False, 'error': 'Quantity must be numeric'}
if not pd.api.types.is_numeric_dtype(df['revenue']):
return {'valid': False, 'error': 'Revenue must be numeric'}
# Check for negative values
if (df['quantity'] < 0).any():
return {'valid': False, 'error': 'Negative quantities found'}
if (df['revenue'] < 0).any():
return {'valid': False, 'error': 'Negative revenue found'}
return {'valid': True, 'rows': len(df), 'products': df['product'].nunique()}
@pytest.fixture
def test_data_validator():
"""Test data validator utility"""
return TestDataValidator()
# ================================================================
# LOGGING CONFIGURATION FOR TESTS
# ================================================================
@pytest.fixture(autouse=True)
def configure_test_logging():
"""Configure logging for tests"""
import logging
# Reduce log level for external libraries during tests
logging.getLogger('prophet').setLevel(logging.WARNING)
logging.getLogger('cmdstanpy').setLevel(logging.ERROR)
logging.getLogger('matplotlib').setLevel(logging.WARNING)
logging.getLogger('urllib3').setLevel(logging.WARNING)
# Configure our app logging for tests
logger = logging.getLogger('app')
logger.setLevel(logging.INFO)
yield
# Reset logging after tests
logging.getLogger().handlers.clear()
# ================================================================
# ENVIRONMENT SETUP
# ================================================================
@pytest.fixture(scope="session", autouse=True)
def setup_test_environment():
"""Setup test environment variables"""
os.environ.update({
'ENVIRONMENT': 'test',
'LOG_LEVEL': 'INFO',
'MODEL_STORAGE_PATH': '/tmp/test_models',
'MAX_TRAINING_TIME_MINUTES': '5',
'MIN_TRAINING_DATA_DAYS': '7',
'PROPHET_SEASONALITY_MODE': 'additive',
'ENABLE_SYNTHETIC_DATA': 'true',
'SKIP_EXTERNAL_API_CALLS': 'true'
})
yield
# Cleanup environment
test_vars = [
'ENVIRONMENT', 'LOG_LEVEL', 'MODEL_STORAGE_PATH',
'MAX_TRAINING_TIME_MINUTES', 'MIN_TRAINING_DATA_DAYS',
'PROPHET_SEASONALITY_MODE', 'ENABLE_SYNTHETIC_DATA',
'SKIP_EXTERNAL_API_CALLS'
2025-07-19 16:59:37 +02:00
]
for var in test_vars:
os.environ.pop(var, None)(scope="session")
def event_loop():
"""Create an instance of the default event loop for the test session."""
loop = asyncio.new_event_loop()
yield loop
loop.close()
def pytest_configure(config):
"""Configure pytest with custom markers"""
config.addinivalue_line(
"markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
)
config.addinivalue_line(
"markers", "integration: marks tests as integration tests"
)
config.addinivalue_line(
"markers", "unit: marks tests as unit tests"
)
config.addinivalue_line(
"markers", "performance: marks tests as performance tests"
)
config.addinivalue_line(
"markers", "external: marks tests that require external services"
)
def pytest_collection_modifyitems(config, items):
"""Modify test collection to add markers automatically"""
for item in items:
# Mark performance tests
if "performance" in item.nodeid:
item.add_marker(pytest.mark.performance)
item.add_marker(pytest.mark.slow)
# Mark integration tests
if "integration" in item.nodeid:
item.add_marker(pytest.mark.integration)
# Mark end-to-end tests
if "end_to_end" in item.nodeid:
item.add_marker(pytest.mark.integration)
item.add_marker(pytest.mark.external)
# Mark unit tests (default for others)
if not any(marker.name in ["integration", "performance"] for marker in item.iter_markers()):
item.add_marker(pytest.mark.unit)
# ================================================================
# TEST DATABASE FIXTURES
# ================================================================
@pytest.fixture
async def test_db_session():
"""Mock database session for testing"""
mock_session = AsyncMock()
# Mock common database operations
mock_session.add = Mock()
mock_session.commit = AsyncMock()
mock_session.rollback = AsyncMock()
mock_session.refresh = AsyncMock()
mock_session.close = AsyncMock()
mock_session.execute = AsyncMock()
mock_session.scalar = AsyncMock()
return mock_session
@pytest.fixture
def training_job_in_db():
"""Mock training job already in database"""
from app.models.training import ModelTrainingLog
job = ModelTrainingLog(
job_id="test_job_123",
tenant_id="test_tenant",
status="running",
progress=50,
current_step="Training model for Pan Integral",
config={"include_weather": True, "include_traffic": True},
started_at=datetime.now(),
logs=["Started training", "Processing data"]
)
return job
# ================================================================
# SAMPLE DATA FIXTURES
# ================================================================
@pytest.fixture
def sample_bakery_sales_data():
"""Generate comprehensive bakery sales data for testing"""
# Generate 1 year of data
start_date = datetime(2023, 1, 1)
dates = [start_date + timedelta(days=i) for i in range(365)]
# Spanish bakery products with realistic patterns
products = [
"Pan Integral", "Pan Blanco", "Croissant", "Magdalenas",
"Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
"Donuts", "Berlinas", "Napolitanas", "Ensaimadas"
2025-07-19 16:59:37 +02:00
]
# Product-specific configurations
product_config = {
"Pan Integral": {"base": 80, "price": 2.80, "weekend_boost": 1.1, "seasonal": False},
"Pan Blanco": {"base": 120, "price": 2.50, "weekend_boost": 1.2, "seasonal": False},
"Croissant": {"base": 45, "price": 1.50, "weekend_boost": 1.4, "seasonal": False},
"Magdalenas": {"base": 30, "price": 1.20, "weekend_boost": 1.1, "seasonal": False},
"Empanadas": {"base": 25, "price": 3.50, "weekend_boost": 0.9, "seasonal": False},
"Tarta Chocolate": {"base": 15, "price": 18.00, "weekend_boost": 1.6, "seasonal": False},
"Roscon Reyes": {"base": 8, "price": 25.00, "weekend_boost": 1.0, "seasonal": True},
"Palmeras": {"base": 12, "price": 1.80, "weekend_boost": 1.2, "seasonal": False},
"Donuts": {"base": 20, "price": 1.40, "weekend_boost": 1.3, "seasonal": False},
"Berlinas": {"base": 18, "price": 1.60, "weekend_boost": 1.2, "seasonal": False},
"Napolitanas": {"base": 22, "price": 1.70, "weekend_boost": 1.1, "seasonal": False},
"Ensaimadas": {"base": 15, "price": 2.20, "weekend_boost": 1.0, "seasonal": False}
}
data = []
for date in dates:
# Calculate date-specific factors
day_of_year = date.timetuple().tm_yday
is_weekend = date.weekday() >= 5
is_holiday = _is_spanish_holiday(date)
2025-07-19 16:59:37 +02:00
# Madrid weather simulation
temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi) + np.random.normal(0, 3)
precip = max(0, np.random.exponential(0.8))
2025-07-19 16:59:37 +02:00
for product in products:
config = product_config[product]
# Base quantity
base_qty = config["base"]
# Apply weekend boost
if is_weekend:
base_qty *= config["weekend_boost"]
# Apply holiday boost
if is_holiday:
base_qty *= 1.3
# Seasonal products (like Roscon Reyes for Christmas)
if config["seasonal"] and product == "Roscon Reyes":
if date.month == 12:
# Exponential increase through December
base_qty *= (1 + (date.day - 1) / 5)
elif date.month == 1 and date.day <= 6:
# High demand until Epiphany (Jan 6)
base_qty *= 3
else:
# Very low demand rest of year
base_qty *= 0.1
# Weather effects
if temp > 30: # Very hot days
if product in ["Pan Integral", "Pan Blanco"]:
base_qty *= 0.7 # Less bread
elif product in ["Donuts", "Berlinas"]:
base_qty *= 0.8 # Less fried items
elif temp < 5: # Cold days
base_qty *= 1.15 # More baked goods
# Add realistic noise and ensure minimum of 1
quantity = max(1, int(base_qty + np.random.normal(0, base_qty * 0.12)))
revenue = round(quantity * config["price"], 2)
data.append({
"date": date.strftime("%Y-%m-%d"),
"product": product,
"quantity": quantity,
"revenue": revenue,
"temperature": round(temp, 1),
"precipitation": round(precip, 2),
"is_weekend": is_weekend,
"is_holiday": is_holiday
})
return pd.DataFrame(data)
@pytest.fixture
def sample_weather_data():
"""Generate realistic Madrid weather data"""
start_date = datetime(2023, 1, 1)
weather_data = []
for i in range(365):
date = start_date + timedelta(days=i)
day_of_year = date.timetuple().tm_yday
# Madrid climate simulation
base_temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi)
# Seasonal humidity patterns
base_humidity = 50 + 20 * np.sin((day_of_year / 365) * 2 * np.pi + np.pi)
weather_data.append({
"date": date,
"temperature": round(base_temp + np.random.normal(0, 4), 1),
"precipitation": max(0, np.random.exponential(1.2)),
"humidity": np.random.uniform(25, 75),
"wind_speed": np.random.uniform(3, 20),
"pressure": np.random.uniform(995, 1025),
"description": np.random.choice([
"Soleado", "Parcialmente nublado", "Nublado",
"Lluvia ligera", "Despejado", "Variable"
]),
"source": "aemet_test"
})
return weather_data
@pytest.fixture
def sample_traffic_data():
"""Generate realistic Madrid traffic data"""
start_date = datetime(2023, 1, 1)
traffic_data = []
for i in range(365):
date = start_date + timedelta(days=i)
# Generate multiple measurements per day
for hour in range(6, 22, 2): # Every 2 hours from 6 AM to 10 PM
measurement_time = date.replace(hour=hour)
# Madrid traffic patterns
if hour in [7, 8, 9, 18, 19, 20]: # Rush hours
volume = np.random.randint(1200, 2000)
congestion = "high"
speed = np.random.randint(10, 25)
occupation = np.random.randint(60, 90)
elif hour in [12, 13, 14]: # Lunch time
volume = np.random.randint(800, 1200)
congestion = "medium"
speed = np.random.randint(20, 35)
occupation = np.random.randint(40, 70)
else: # Off-peak
volume = np.random.randint(300, 800)
congestion = "low"
speed = np.random.randint(30, 50)
occupation = np.random.randint(15, 50)
# Weekend adjustment
if date.weekday() >= 5:
volume = int(volume * 0.8) # Less traffic on weekends
speed = min(50, int(speed * 1.2)) # Faster speeds
traffic_data.append({
"date": measurement_time,
"traffic_volume": volume,
"occupation_percentage": occupation,
"load_percentage": min(95, occupation + np.random.randint(5, 15)),
"average_speed": speed,
"congestion_level": congestion,
"pedestrian_count": np.random.randint(100, 800),
"measurement_point_id": "MADRID_TEST_001",
"measurement_point_name": "Plaza Mayor",
"road_type": "URB",
"source": "madrid_opendata_test"
})
return traffic_data
# ================================================================
# MOCK SERVICES FIXTURES
# ================================================================
@pytest.fixture
async def mock_aemet_client(sample_weather_data):
"""Mock AEMET weather API client"""
with patch('app.external.aemet.AEMETClient') as mock_class:
mock_instance = AsyncMock()
mock_class.return_value = mock_instance
# Configure mock responses
mock_instance.get_historical_weather.return_value = sample_weather_data
mock_instance.get_current_weather.return_value = sample_weather_data[-1]
mock_instance.get_weather_forecast.return_value = sample_weather_data[-7:]
yield mock_instance
@pytest.fixture
async def mock_madrid_client(sample_traffic_data):
"""Mock Madrid OpenData API client"""
with patch('app.external.madrid_opendata.MadridOpenDataClient') as mock_class:
mock_instance = AsyncMock()
mock_class.return_value = mock_instance
# Configure mock responses
mock_instance.get_historical_traffic.return_value = sample_traffic_data
mock_instance.get_current_traffic.return_value = sample_traffic_data[-1]
yield mock_instance
@pytest.fixture
async def mock_external_services(mock_aemet_client, mock_madrid_client):
"""Combined mock for all external services"""
return {
'aemet': mock_aemet_client,
'madrid': mock_madrid_client
}
# ================================================================
# ML COMPONENT FIXTURES
# ================================================================
2025-07-19 16:59:37 +02:00
@pytest.fixture
def mock_ml_trainer():
"""Mock ML trainer for testing"""
with patch('app.ml.trainer.BakeryMLTrainer') as mock_class:
mock_instance = AsyncMock()
mock_class.return_value = mock_instance
2025-07-19 16:59:37 +02:00
# Configure successful training responses
mock_instance.train_single_product.return_value = {
2025-07-19 16:59:37 +02:00
"status": "completed",
"model_id": "test_model_123",
"metrics": {
"mape": 25.5,
"rmse": 12.3,
"mae": 8.7,
"r2_score": 0.85
},
"training_duration": 45.2,
"data_points_used": 365
}
mock_instance.train_tenant_models.return_value = [
{
"product_name": "Pan Integral",
"model_id": "model_pan_integral_123",
"metrics": {"mape": 22.1, "rmse": 10.5, "mae": 7.8},
"training_completed": True
},
{
"product_name": "Croissant",
"model_id": "model_croissant_456",
"metrics": {"mape": 28.3, "rmse": 8.9, "mae": 6.2},
"training_completed": True
}
]
yield mock_instance
@pytest.fixture
def mock_data_processor():
"""Mock data processor for testing"""
with patch('app.ml.data_processor.BakeryDataProcessor') as mock_class:
mock_instance = AsyncMock()
mock_class.return_value = mock_instance
# Configure mock responses
mock_instance.validate_data_quality.return_value = {
"is_valid": True,
"data_points": 1000,
"missing_percentage": 2.5,
"issues": []
}
mock_instance.prepare_training_data.return_value = pd.DataFrame({
"ds": pd.date_range("2023-01-01", periods=365),
"y": np.random.randint(10, 100, 365),
"temperature": np.random.uniform(0, 35, 365),
"traffic_volume": np.random.randint(100, 2000, 365)
})
yield mock_instance
@pytest.fixture
def mock_prophet_manager():
"""Mock Prophet manager for testing"""
with patch('app.ml.prophet_manager.BakeryProphetManager') as mock_class:
mock_instance = AsyncMock()
mock_class.return_value = mock_instance
# Configure mock responses
mock_instance.train_model.return_value = {
"model": Mock(), # Mock Prophet model
"metrics": {
"mape": 23.7,
"rmse": 11.2,
"mae": 8.1
2025-07-19 16:59:37 +02:00
},
"cross_validation": {
"cv_mape_mean": 25.1,
"cv_mape_std": 3.2
2025-07-19 16:59:37 +02:00
}
}
mock_instance.generate_predictions.return_value = pd.DataFrame({
"ds": pd.date_range("2024-01-01", periods=30),
"yhat": np.random.uniform(20, 80, 30),
"yhat_lower": np.random.uniform(10, 60, 30),
"yhat_upper": np.random.uniform(30, 100, 30)
2025-07-19 16:59:37 +02:00
})
yield mock_instance
# ================================================================
# UTILITY FIXTURES
# ================================================================
@pytest.fixture
def temp_model_storage():
"""Temporary directory for model storage during tests"""
with tempfile.TemporaryDirectory() as temp_dir:
yield Path(temp_dir)
2025-07-19 16:59:37 +02:00
@pytest.fixture
def test_config():
"""Test configuration settings"""
2025-07-19 16:59:37 +02:00
return {
"MODEL_STORAGE_PATH": "/tmp/test_models",
"MAX_TRAINING_TIME_MINUTES": 5,
"MIN_TRAINING_DATA_DAYS": 7,
"PROPHET_SEASONALITY_MODE": "additive",
"INCLUDE_SPANISH_HOLIDAYS": True,
"ENABLE_SYNTHETIC_DATA": True
}
@pytest.fixture
def sample_training_request():
"""Sample training request for API tests"""
return {
"products": ["Pan Integral", "Croissant"],
"include_weather": True,
"include_traffic": True,
2025-07-19 16:59:37 +02:00
"config": {
"seasonality_mode": "additive",
"changepoint_prior_scale": 0.05,
"seasonality_prior_scale": 10.0,
"validation_enabled": True
2025-07-19 16:59:37 +02:00
}
}
@pytest.fixture
def sample_single_product_request():
"""Sample single product training request"""
return {
"product_name": "Pan Integral",
"include_weather": True,
"include_traffic": False,
"config": {
"seasonality_mode": "multiplicative",
"include_holidays": True,
"holiday_prior_scale": 15.0
}
}
# ================================================================
# HELPER FUNCTIONS
# ================================================================
def _is_spanish_holiday(date: datetime) -> bool:
"""Check if date is a Spanish holiday"""
spanish_holidays = [
(1, 1), # Año Nuevo
(1, 6), # Reyes Magos
(5, 1), # Día del Trabajo
(8, 15), # Asunción de la Virgen
(10, 12), # Fiesta Nacional de España
(11, 1), # Todos los Santos
(12, 6), # Día de la Constitución
(12, 8), # Inmaculada Concepción
(12, 25), # Navidad
]
return (date.month, date.day) in spanish_holidays
@pytest.fixture
def spanish_holidays_2023():
"""List of Spanish holidays for 2023"""
holidays = []
for month, day in [
(1, 1), (1, 6), (5, 1), (8, 15), (10, 12),
(11, 1), (12, 6), (12, 8), (12, 25)
]:
holidays.append(datetime(2023, month, day))
return holidays
# ================================================================
# PERFORMANCE TESTING FIXTURES
# ================================================================
@pytest.fixture
def large_dataset_for_performance():
"""Generate large dataset for performance testing"""
# Generate 2 years of data with 15 products
start_date = datetime(2022, 1, 1)
end_date = datetime(2024, 1, 1)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')
products = [
"Pan Integral", "Pan Blanco", "Croissant", "Magdalenas",
"Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
"Donuts", "Berlinas", "Napolitanas", "Ensaimadas",
"Baguette", "Pan de Molde", "Bizcocho"
]
data = []
for date in date_range:
for product in products:
# Realistic sales with patterns
base_quantity = np.random.randint(5, 150)
# Seasonal patterns
if date.month in [12, 1]: # Winter/Holiday season
base_quantity *= 1.4
elif date.month in [6, 7, 8]: # Summer
base_quantity *= 0.8
# Weekly patterns
if date.weekday() >= 5: # Weekends
base_quantity *= 1.2
elif date.weekday() == 0: # Monday
base_quantity *= 0.7
# Add noise
quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1)))
data.append({
"date": date.strftime("%Y-%m-%d"),
"product": product,
"quantity": quantity,
"revenue": round(quantity * np.random.uniform(1.5, 8.0), 2),
"temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1),
"precipitation": max(0, np.random.exponential(0.8)),
"is_weekend": date.weekday() >= 5,
"is_holiday": _is_spanish_holiday(date)
})
return pd.DataFrame(data)
@pytest.fixture
def memory_monitor():
"""Memory monitoring utility for performance tests"""
import psutil
import gc
class MemoryMonitor:
def __init__(self):
self.process = psutil.Process()
self.snapshots = []
def snapshot(self, label: str):
gc.collect() # Force garbage collection
memory_mb = self.process.memory_info().rss / 1024 / 1024
self.snapshots.append({
'label': label,
'memory_mb': memory_mb,
'timestamp': datetime.now()
})
return memory_mb
def get_peak_usage(self):
if not self.snapshots:
return 0
return max(s['memory_mb'] for s in self.snapshots)
def get_usage_increase(self):
if len(self.snapshots) < 2:
return 0
return self.snapshots[-1]['memory_mb'] - self.snapshots[0]['memory_mb']
def report(self):
print("\n=== Memory Usage Report ===")
for snapshot in self.snapshots:
print(f"{snapshot['label']}: {snapshot['memory_mb']:.2f} MB")
print(f"Peak Usage: {self.get_peak_usage():.2f} MB")
print(f"Total Increase: {self.get_usage_increase():.2f} MB")
return MemoryMonitor()
2025-07-19 16:59:37 +02:00
@pytest.fixture
def timing_monitor():
"""Timing monitoring utility for performance tests"""
import time
class TimingMonitor:
def __init__(self):
self.timings = []
self.start_time = None
def start(self, label: str):
self.start_time = time.time()
self.current_label = label
def stop(self):
if self.start_time is None:
return 0
duration = time.time() - self.start_time
self.timings.append({
'label': self.current_label,
'duration': duration
})
self.start_time = None
return duration
def get_total_time(self):
return sum(t['duration'] for t in self.timings)
def report(self):
print("\n=== Timing Report ===")
for timing in self.timings:
print(f"{timing['label']}: {timing['duration']:.2f}s")
print(f"Total Time: {self.get_total_time():.2f}s")
return TimingMonitor()
# ================================================================
# ADDITIONAL FIXTURES FOR COMPREHENSIVE TESTING
# ================================================================
@pytest.fixture
def mock_job_scheduler():
"""Mock job scheduler for testing"""
with patch('app.services.job_scheduler.JobScheduler') as mock_scheduler:
mock_instance = Mock()
mock_scheduler.return_value = mock_instance
mock_instance.schedule_job.return_value = "scheduled_job_123"
mock_instance.cancel_job.return_value = True
mock_instance.get_job_status.return_value = "running"
yield mock_instance
@pytest.fixture
def sample_model_metadata():
"""Sample model metadata for testing"""
2025-07-19 16:59:37 +02:00
return {
"model_id": "test_model_123",
"tenant_id": "test_tenant",
2025-07-19 16:59:37 +02:00
"product_name": "Pan Integral",
"model_type": "prophet",
"training_date": datetime.now().isoformat(),
"data_points_used": 365,
"features_used": ["temperature", "is_weekend", "is_holiday"],
"metrics": {
"mape": 23.5,
"rmse": 12.3,
"mae": 8.7,
"r2_score": 0.85
},
2025-07-19 16:59:37 +02:00
"hyperparameters": {
"seasonality_mode": "additive",
"changepoint_prior_scale": 0.05,
"seasonality_prior_scale": 10.0
2025-07-19 16:59:37 +02:00
},
"version": "1.0",
"status": "active"
2025-07-19 16:59:37 +02:00
}
2025-07-19 16:59:37 +02:00
@pytest.fixture
def training_progress_states():
"""Different training progress states for testing"""
return [
{"status": "pending", "progress": 0, "current_step": "Initializing training job"},
{"status": "running", "progress": 10, "current_step": "Fetching sales data"},
{"status": "running", "progress": 25, "current_step": "Processing weather data"},
{"status": "running", "progress": 40, "current_step": "Processing traffic data"},
{"status": "running", "progress": 55, "current_step": "Engineering features"},
{"status": "running", "progress": 70, "current_step": "Training Pan Integral model"},
{"status": "running", "progress": 85, "current_step": "Validating model performance"},
{"status": "running", "progress": 95, "current_step": "Saving model artifacts"},
{"status": "completed", "progress": 100, "current_step": "Training completed successfully"}
]
2025-07-19 16:59:37 +02:00
@pytest.fixture
def error_scenarios():
"""Different error scenarios for testing"""
return {
"insufficient_data": {
"error_type": "DataError",
"error_message": "Insufficient training data: only 15 days available, minimum 30 required",
"error_code": "INSUFFICIENT_DATA"
},
"external_api_failure": {
"error_type": "ExternalAPIError",
"error_message": "Failed to fetch weather data from AEMET API",
"error_code": "WEATHER_API_ERROR"
},
"model_training_failure": {
"error_type": "ModelTrainingError",
"error_message": "Prophet model training failed: unable to fit data",
"error_code": "MODEL_TRAINING_FAILED"
},
"data_quality_error": {
"error_type": "DataQualityError",
"error_message": "Data quality issues detected: 45% missing values in quantity column",
"error_code": "DATA_QUALITY_POOR"
}
}
@pytest.fixture
def performance_benchmarks():
"""Performance benchmarks for testing"""
return {
"single_product_training": {
"max_duration_seconds": 120,
"max_memory_mb": 500,
"min_accuracy_mape": 50
},
"multi_product_training": {
"max_duration_seconds": 300,
"max_memory_mb": 1000,
"min_accuracy_mape": 55
},
"data_processing": {
"max_throughput_rows_per_second": 1000,
"max_memory_per_1k_rows_mb": 10
},
"concurrent_jobs": {
"max_concurrent_jobs": 5,
"max_queue_time_seconds": 30
}
}
@pytest.fixture
def mock_model_storage():
"""Mock model storage system for testing"""
storage = {}
2025-07-19 16:59:37 +02:00
class MockModelStorage:
def save_model(self, model_id: str, model_data: Any, metadata: Dict[str, Any]):
storage[model_id] = {
"model_data": model_data,
"metadata": metadata,
"saved_at": datetime.now()
}
return f"/models/{model_id}.pkl"
def load_model(self, model_id: str):
if model_id in storage:
return storage[model_id]["model_data"]
raise FileNotFoundError(f"Model {model_id} not found")
def get_metadata(self, model_id: str):
if model_id in storage:
return storage[model_id]["metadata"]
raise FileNotFoundError(f"Model {model_id} not found")
def delete_model(self, model_id: str):
if model_id in storage:
del storage[model_id]
return True
return False
def list_models(self, tenant_id: str = None):
models = []
for model_id, data in storage.items():
if tenant_id is None or data["metadata"].get("tenant_id") == tenant_id:
models.append({
"model_id": model_id,
"metadata": data["metadata"],
"saved_at": data["saved_at"]
})
return models
2025-07-19 16:59:37 +02:00
return MockModelStorage()
2025-07-19 16:59:37 +02:00
@pytest.fixture
def real_world_scenarios():
"""Real-world bakery scenarios for testing"""
return {
"holiday_rush": {
"description": "Christmas season with high demand for seasonal products",
"date_range": ("2023-12-15", "2023-12-31"),
"expected_patterns": {
"Roscon Reyes": {"multiplier": 5.0, "trend": "increasing"},
"Pan Integral": {"multiplier": 1.3, "trend": "stable"},
"Tarta Chocolate": {"multiplier": 2.0, "trend": "increasing"}
}
},
"summer_slowdown": {
"description": "Summer period with generally lower sales",
"date_range": ("2023-07-01", "2023-08-31"),
"expected_patterns": {
"Pan Integral": {"multiplier": 0.8, "trend": "decreasing"},
"Croissant": {"multiplier": 0.9, "trend": "stable"},
"Cold_drinks": {"multiplier": 1.5, "trend": "increasing"}
}
},
"weekend_patterns": {
"description": "Weekend shopping patterns",
"expected_patterns": {
"weekend_boost": 1.2,
"peak_hours": ["10:00", "11:00", "18:00", "19:00"],
"popular_products": ["Croissant", "Palmeras", "Tarta Chocolate"]
}
},
"weather_impact": {
"description": "Weather impact on sales",
"scenarios": {
"rainy_day": {"bread_sales": 1.1, "pastry_sales": 0.9},
"hot_day": {"bread_sales": 0.8, "cold_items": 1.3},
"cold_day": {"bread_sales": 1.2, "hot_items": 1.4}
}
}
}
@pytest.fixture
def data_quality_test_cases():
"""Various data quality test cases"""
return {
"missing_values": {
"quantity_missing_5pct": 0.05,
"quantity_missing_20pct": 0.20,
"quantity_missing_50pct": 0.50,
"revenue_missing_10pct": 0.10
},
"outliers": {
"extreme_high": 100, # 100x normal values
"extreme_low": 0.01, # Near-zero values
"negative_values": -1,
"outlier_percentage": 0.01
},
"inconsistencies": {
"future_dates": ["2025-12-31", "2026-01-01"],
"invalid_dates": ["2023-13-01", "2023-02-30"],
"mismatched_revenue": True, # Revenue doesn't match quantity * price
"duplicate_records": True
},
"insufficient_data": {
"too_few_days": 10,
"too_few_products": 1,
"sporadic_data": 0.3 # Only 30% of expected data points
}
}
@pytest.fixture
def api_test_scenarios():
"""API testing scenarios"""
return {
"authentication": {
"valid_token": "Bearer valid_test_token_123",
"invalid_token": "Bearer invalid_token",
"expired_token": "Bearer expired_token_456",
"missing_token": None
},
"request_validation": {
"valid_request": {
"products": ["Pan Integral"],
"include_weather": True,
"include_traffic": True,
"config": {"seasonality_mode": "additive"}
},
"invalid_products": {
"products": [], # Empty products list
"include_weather": True
},
"invalid_config": {
"products": ["Pan Integral"],
"config": {"seasonality_mode": "invalid_mode"}
},
"missing_required_fields": {
"include_weather": True # Missing products
2025-07-19 16:59:37 +02:00
}
},
"rate_limiting": {
"max_requests_per_minute": 60,
"burst_requests": 100
2025-07-19 16:59:37 +02:00
}
}
@pytest.fixture
def integration_test_dependencies():
"""Dependencies for integration testing"""
class IntegrationDependencies:
def __init__(self):
self.external_services = {}
self.databases = {}
self.message_queues = {}
self.storage_systems = {}
def register_external_service(self, name: str, mock_instance):
self.external_services[name] = mock_instance
def register_database(self, name: str, mock_session):
self.databases[name] = mock_session
def register_message_queue(self, name: str, mock_queue):
self.message_queues[name] = mock_queue
def register_storage(self, name: str, mock_storage):
self.storage_systems[name] = mock_storage
2025-07-19 16:59:37 +02:00
def get_service(self, name: str):
return self.external_services.get(name)
2025-07-19 16:59:37 +02:00
def get_database(self, name: str):
return self.databases.get(name)
def are_all_services_healthy(self):
# Mock health check for all registered services
return len(self.external_services) > 0
return IntegrationDependencies()
2025-07-19 16:59:37 +02:00
@pytest.fixture
def load_test_configuration():
"""Configuration for load testing"""
return {
"concurrent_users": {
"light_load": 5,
"medium_load": 15,
"heavy_load": 30,
"stress_load": 50
},
"test_duration": {
"quick_test": 60, # 1 minute
"standard_test": 300, # 5 minutes
"extended_test": 900 # 15 minutes
},
"request_patterns": {
"constant_rate": "steady",
"ramp_up": "increasing",
"spike": "burst",
"random": "variable"
},
"success_criteria": {
"min_success_rate": 0.95,
"max_response_time": 30.0, # seconds
"max_error_rate": 0.05
}
}
@pytest.fixture
def mock_notification_system():
"""Mock notification system for testing"""
notifications_sent = []
2025-07-19 16:59:37 +02:00
class MockNotificationSystem:
def send_training_started(self, tenant_id: str, job_id: str, products: List[str]):
notification = {
"type": "training_started",
"tenant_id": tenant_id,
"job_id": job_id,
"products": products,
"timestamp": datetime.now()
}
notifications_sent.append(notification)
return notification
2025-07-19 16:59:37 +02:00
def send_training_completed(self, tenant_id: str, job_id: str, results: Dict[str, Any]):
notification = {
"type": "training_completed",
"tenant_id": tenant_id,
"job_id": job_id,
"results": results,
"timestamp": datetime.now()
}
notifications_sent.append(notification)
return notification
2025-07-19 16:59:37 +02:00
def send_training_failed(self, tenant_id: str, job_id: str, error: str):
notification = {
"type": "training_failed",
"tenant_id": tenant_id,
"job_id": job_id,
"error": error,
"timestamp": datetime.now()
}
notifications_sent.append(notification)
return notification
2025-07-19 16:59:37 +02:00
def get_notifications(self, tenant_id: str = None):
if tenant_id:
return [n for n in notifications_sent if n["tenant_id"] == tenant_id]
return notifications_sent
def clear_notifications(self):
notifications_sent.clear()
return MockNotificationSystem()
2025-07-19 16:59:37 +02:00
@pytest.fixture
def test_metrics_collector():
"""Test metrics collector for monitoring test performance"""
metrics = {}
class TestMetricsCollector:
def __init__(self):
self.start_times = {}
self.counters = {}
self.gauges = {}
self.histograms = {}
def start_timer(self, metric_name: str):
self.start_times[metric_name] = time.time()
def end_timer(self, metric_name: str):
if metric_name in self.start_times:
duration = time.time() - self.start_times[metric_name]
if metric_name not in self.histograms:
self.histograms[metric_name] = []
self.histograms[metric_name].append(duration)
del self.start_times[metric_name]
return duration
return 0
def increment_counter(self, counter_name: str, value: int = 1):
self.counters[counter_name] = self.counters.get(counter_name, 0) + value
def set_gauge(self, gauge_name: str, value: float):
self.gauges[gauge_name] = value
def get_counter(self, counter_name: str):
return self.counters.get(counter_name, 0)
def get_gauge(self, gauge_name: str):
return self.gauges.get(gauge_name, 0)
def get_histogram_stats(self, histogram_name: str):
if histogram_name not in self.histograms:
return {}
values = self.histograms[histogram_name]
return {
"count": len(values),
"min": min(values) if values else 0,
"max": max(values) if values else 0,
"avg": sum(values) / len(values) if values else 0,
"p50": sorted(values)[len(values)//2] if values else 0,
"p95": sorted(values)[int(len(values)*0.95)] if values else 0,
"p99": sorted(values)[int(len(values)*0.99)] if values else 0
}
def get_all_metrics(self):
return {
"counters": self.counters,
"gauges": self.gauges,
"histograms": {name: self.get_histogram_stats(name) for name in self.histograms}
}
def reset(self):
self.start_times.clear()
self.counters.clear()
self.gauges.clear()
self.histograms.clear()
import time
return TestMetricsCollector()
# ================================================================
# PYTEST PLUGINS AND HOOKS
# ================================================================
def pytest_runtest_setup(item):
"""Setup before each test"""
# Add any pre-test setup logic here
pass
def pytest_runtest_teardown(item, nextitem):
"""Teardown after each test"""
# Add any post-test cleanup logic here
import gc
gc.collect() # Force garbage collection after each test
def pytest_sessionstart(session):
"""Called after the Session object has been created"""
print("\n" + "="*80)
print("TRAINING SERVICE TEST SESSION STARTING")
print("="*80)
def pytest_sessionfinish(session, exitstatus):
"""Called after whole test run finished"""
print("\n" + "="*80)
print("TRAINING SERVICE TEST SESSION FINISHED")
print(f"Exit Status: {exitstatus}")
print("="*80)
# ================================================================
# FINAL CONFIGURATION
# ================================================================
# Ensure numpy doesn't use too many threads during testing
import numpy as np
np.seterr(all='ignore') # Ignore numpy warnings during tests
2025-07-19 16:59:37 +02:00
# Configure pandas for testing
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50)
2025-07-19 16:59:37 +02:00
# Set random seeds for reproducible tests
np.random.seed(42)
import random
random.seed(42)