bakery-ia/services/training/tests/conftest.py

# services/training/tests/conftest.py
"""
Test configuration and fixtures for training service ML components
"""

import pytest
import asyncio
import os
import tempfile
import pandas as pd
import numpy as np
from unittest.mock import Mock, AsyncMock, patch
from typing import Dict, List, Any, Generator
from datetime import datetime, timedelta
import uuid

# Configure test environment
os.environ["MODEL_STORAGE_PATH"] = "/tmp/test_models"
os.environ["TRAINING_DATABASE_URL"] = "sqlite+aiosqlite:///:memory:"

# Create test event loop
@pytest.fixture(scope="session")
def event_loop():
    """Create an instance of the default event loop for the test session."""
    loop = asyncio.get_event_loop_policy().new_event_loop()
    yield loop
    loop.close()

# ================================================================
# PYTEST CONFIGURATION
# ================================================================

def pytest_configure(config):
    """Configure pytest markers"""
    config.addinivalue_line("markers", "unit: Unit tests")
    config.addinivalue_line("markers", "integration: Integration tests")
    config.addinivalue_line("markers", "ml: Machine learning tests")
    config.addinivalue_line("markers", "slow: Slow-running tests")

# ================================================================
# MOCK SETTINGS AND CONFIGURATION
# ================================================================

@pytest.fixture(autouse=True)
def mock_settings():
    """Mock settings for all tests"""
    with patch('app.core.config.settings') as mock_settings:
        mock_settings.MODEL_STORAGE_PATH = "/tmp/test_models"
        mock_settings.MIN_TRAINING_DATA_DAYS = 30
        mock_settings.PROPHET_SEASONALITY_MODE = "additive"
        mock_settings.PROPHET_CHANGEPOINT_PRIOR_SCALE = 0.05
        mock_settings.PROPHET_SEASONALITY_PRIOR_SCALE = 10.0
        mock_settings.PROPHET_HOLIDAYS_PRIOR_SCALE = 10.0
        mock_settings.ENABLE_SPANISH_HOLIDAYS = True
        mock_settings.ENABLE_MADRID_HOLIDAYS = True

        # Ensure test model directory exists
        os.makedirs("/tmp/test_models", exist_ok=True)

        yield mock_settings

# ================================================================
# MOCK ML COMPONENTS
# ================================================================

@pytest.fixture
def mock_prophet_manager():
    """Mock BakeryProphetManager for testing"""
    mock_manager = AsyncMock()

    # Mock train_bakery_model method
    mock_manager.train_bakery_model.return_value = {
        'model_id': f'test-model-{uuid.uuid4().hex[:8]}',
        'model_path': '/tmp/test_models/test_model.pkl',
        'type': 'prophet',
        'training_samples': 100,
        'features': ['temperature', 'humidity', 'day_of_week'],
        'training_metrics': {
            'mae': 5.2,
            'rmse': 7.8,
            'r2': 0.85
        },
        'created_at': datetime.now().isoformat()
    }

    # Mock validate_training_data method
    mock_manager._validate_training_data = AsyncMock()

    # Mock generate_forecast method
    mock_manager.generate_forecast.return_value = pd.DataFrame({
        'ds': pd.date_range('2024-02-01', periods=7, freq='D'),
        'yhat': [50.0] * 7,
        'yhat_lower': [45.0] * 7,
        'yhat_upper': [55.0] * 7
    })

    # Mock other methods
    mock_manager._get_spanish_holidays.return_value = pd.DataFrame({
        'holiday': ['new_year', 'christmas'],
        'ds': [datetime(2024, 1, 1), datetime(2024, 12, 25)]
    })

    mock_manager._extract_regressor_columns.return_value = ['temperature', 'humidity']

    return mock_manager

@pytest.fixture
def mock_data_processor():
    """Mock BakeryDataProcessor for testing"""
    mock_processor = AsyncMock()

    # Mock prepare_training_data method
    mock_processor.prepare_training_data.return_value = pd.DataFrame({
        'ds': pd.date_range('2024-01-01', periods=35, freq='D'),
        'y': [45 + 5 * np.sin(i / 7) for i in range(35)],
        'temperature': [15.0] * 35,
        'humidity': [65.0] * 35,
        'day_of_week': [i % 7 for i in range(35)],
        'is_weekend': [1 if i % 7 >= 5 else 0 for i in range(35)],
        'month': [1] * 35,
        'is_holiday': [0] * 35
    })

    # Mock prepare_prediction_features method
    mock_processor.prepare_prediction_features.return_value = pd.DataFrame({
        'ds': pd.date_range('2024-02-01', periods=7, freq='D'),
        'temperature': [18.0] * 7,
        'humidity': [65.0] * 7,
        'day_of_week': [i % 7 for i in range(7)],
        'is_weekend': [1 if i % 7 >= 5 else 0 for i in range(7)],
        'month': [2] * 7,
        'is_holiday': [0] * 7
    })

    # Mock private methods for testing
    mock_processor._add_temporal_features.return_value = pd.DataFrame({
        'date': pd.date_range('2024-01-01', periods=10, freq='D'),
        'day_of_week': [i % 7 for i in range(10)],
        'is_weekend': [1 if i % 7 >= 5 else 0 for i in range(10)],
        'month': [1] * 10,
        'season': ['winter'] * 10,
        'week_of_year': [1] * 10,
        'quarter': [1] * 10,
        'is_holiday': [0] * 10,
        'is_school_holiday': [0] * 10
    })

    mock_processor._is_spanish_holiday.return_value = False

    return mock_processor

# ================================================================
# SAMPLE DATA FIXTURES
# ================================================================

@pytest.fixture
def sample_sales_data():
    """Generate sample sales data for testing"""
    dates = pd.date_range('2024-01-01', periods=35, freq='D')
    data = []
    for i, date in enumerate(dates):
        data.append({
            'date': date,
            'product_name': 'Pan Integral',
            'quantity': 40 + (5 * np.sin(i / 7)) + np.random.normal(0, 2)
        })
    return pd.DataFrame(data)

@pytest.fixture
def sample_weather_data():
    """Generate sample weather data for testing"""
    dates = pd.date_range('2024-01-01', periods=60, freq='D')
    return pd.DataFrame({
        'date': dates,
        'temperature': [15 + 5 * np.sin(2 * np.pi * i / 365) + np.random.normal(0, 2) for i in range(60)],
        'precipitation': [max(0, np.random.exponential(1)) for _ in range(60)],
        'humidity': [60 + np.random.normal(0, 10) for _ in range(60)]
    })

@pytest.fixture
def sample_traffic_data():
    """Generate sample traffic data for testing"""
    dates = pd.date_range('2024-01-01', periods=60, freq='D')
    return pd.DataFrame({
        'date': dates,
        'traffic_volume': [100 + np.random.normal(0, 20) for _ in range(60)]
    })

@pytest.fixture
def sample_prophet_data():
    """Generate sample data in Prophet format for testing"""
    dates = pd.date_range('2024-01-01', periods=100, freq='D')
    return pd.DataFrame({
        'ds': dates,
        'y': [45 + 10 * np.sin(2 * np.pi * i / 7) + np.random.normal(0, 5) for i in range(100)],
        'temperature': [15 + 5 * np.sin(2 * np.pi * i / 365) for i in range(100)],
        'humidity': [60 + np.random.normal(0, 10) for _ in range(100)]
    })

@pytest.fixture
def sample_sales_records():
    """Generate sample sales records as list of dicts"""
    return [
        {"date": "2024-01-01", "product_name": "Pan Integral", "quantity": 45},
        {"date": "2024-01-02", "product_name": "Pan Integral", "quantity": 50},
        {"date": "2024-01-03", "product_name": "Pan Integral", "quantity": 48},
        {"date": "2024-01-04", "product_name": "Croissant", "quantity": 25},
        {"date": "2024-01-05", "product_name": "Croissant", "quantity": 30}
    ]

# ================================================================
# UTILITY FIXTURES
# ================================================================

@pytest.fixture
def temp_model_dir():
    """Create a temporary directory for model storage"""
    with tempfile.TemporaryDirectory() as temp_dir:
        yield temp_dir

@pytest.fixture
def test_tenant_id():
    """Generate a test tenant ID"""
    return f"test-tenant-{uuid.uuid4().hex[:8]}"

@pytest.fixture
def test_job_id():
    """Generate a test job ID"""
    return f"test-job-{uuid.uuid4().hex[:8]}"

# ================================================================
# MOCK EXTERNAL DEPENDENCIES (Simplified)
# ================================================================

@pytest.fixture
def mock_prophet_model():
    """Create a mock Prophet model for testing"""
    mock_model = Mock()
    mock_model.fit.return_value = None
    mock_model.predict.return_value = pd.DataFrame({
        'ds': pd.date_range('2024-02-01', periods=7, freq='D'),
        'yhat': [50.0] * 7,
        'yhat_lower': [45.0] * 7,
        'yhat_upper': [55.0] * 7
    })
    mock_model.add_regressor.return_value = None
    return mock_model

# ================================================================
# DATABASE MOCKS
# ================================================================

@pytest.fixture
def mock_db_session():
    """Mock database session for testing"""
    mock_session = AsyncMock()
    mock_session.commit = AsyncMock()
    mock_session.rollback = AsyncMock()
    mock_session.close = AsyncMock()
    mock_session.add = Mock()
    mock_session.execute = AsyncMock()
    mock_session.scalar = AsyncMock()
    mock_session.scalars = AsyncMock()
    return mock_session

# ================================================================
# PERFORMANCE TESTING
# ================================================================

@pytest.fixture
def performance_tracker():
    """Performance tracking utilities for tests"""

    class PerformanceTracker:
        def __init__(self):
            self.start_time = None
            self.measurements = {}

        def start(self, operation_name: str = "default"):
            self.start_time = datetime.now()
            self.operation_name = operation_name

        def stop(self) -> float:
            if self.start_time:
                duration = (datetime.now() - self.start_time).total_seconds() * 1000
                self.measurements[self.operation_name] = duration
                return duration
            return 0.0

        def assert_performance(self, max_duration_ms: float, operation_name: str = "default"):
            duration = self.measurements.get(operation_name, float('inf'))
            assert duration <= max_duration_ms, f"Operation {operation_name} took {duration:.0f}ms, expected <= {max_duration_ms}ms"

    return PerformanceTracker()

# ================================================================
# CLEANUP
# ================================================================

@pytest.fixture(autouse=True)
def cleanup_after_test():
    """Automatic cleanup after each test"""
    yield
    # Clean up any test model files
    test_model_path = "/tmp/test_models"
    if os.path.exists(test_model_path):
        for file in os.listdir(test_model_path):
            try:
                os.remove(os.path.join(test_model_path, file))
            except (OSError, PermissionError):
                pass