Fix generating pytest for training service 2

2025-07-25 14:46:45 +02:00
parent 499d6a1db0
commit 7995429454
10 changed files with 13 additions and 5936 deletions
--- a/services/training/app/main.py
+++ b/services/training/app/main.py
@@ -220,6 +220,19 @@ async def get_metrics():
        return app.state.metrics_collector.get_metrics()
    return {"status": "metrics not available"}
@app.get("/health/live")
 async def liveness_check():
    return {"status": "alive"}
@app.get("/health/ready") 
 async def readiness_check():
    ready = getattr(app.state, 'ready', True)
    return {"status": "ready" if ready else "not ready"}
@app.get("/")
 async def root():
    return {"service": "training-service", "version": "1.0.0"}
 if __name__ == "__main__":
    uvicorn.run(
        "app.main:app",
--- a/services/training/tests/conftest.py
+++ b/services/training/tests/conftest.py
--- a/services/training/tests/run_tests.py
+++ b/services/training/tests/run_tests.py
@@ -1,673 +0,0 @@
 # ================================================================
 # services/training/tests/run_tests.py
 # ================================================================
 """
 Main test runner script for Training Service
 Executes comprehensive test suite and generates reports
 """
 import os
 import sys
 import asyncio
 import subprocess
 import json
 import time
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Any
 import logging
 # Setup logging
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 class TrainingTestRunner:
    """Main test runner for training service"""
    def __init__(self):
        self.test_dir = Path(__file__).parent
        self.results_dir = self.test_dir / "results"
        self.results_dir.mkdir(exist_ok=True)
        # Test configuration
        self.test_suites = {
            "unit": {
                "files": ["test_api.py", "test_ml.py", "test_service.py"],
                "description": "Unit tests for individual components",
                "timeout": 300  # 5 minutes
            },
            "integration": {
                "files": ["test_ml_pipeline_integration.py"],
                "description": "Integration tests for ML pipeline with external data",
                "timeout": 600  # 10 minutes
            },
            "performance": {
                "files": ["test_performance.py"],
                "description": "Performance and load testing",
                "timeout": 900  # 15 minutes
            },
            "end_to_end": {
                "files": ["test_end_to_end.py"],
                "description": "End-to-end workflow testing",
                "timeout": 800  # 13 minutes
            }
        }
        self.test_results = {}
    async def setup_test_environment(self):
        """Setup test environment and dependencies"""
        logger.info("Setting up test environment...")
        # Check if we're running in Docker
        if os.path.exists("/.dockerenv"):
            logger.info("Running in Docker environment")
        else:
            logger.info("Running in local environment")
        # Verify required files exist
        required_files = [
            "conftest.py",
            "test_ml_pipeline_integration.py",
            "test_performance.py"
        ]
        for file in required_files:
            file_path = self.test_dir / file
            if not file_path.exists():
                logger.warning(f"Required test file missing: {file}")
        # Create test data if needed
        await self.create_test_data()
        # Verify external services (mock or real)
        await self.verify_external_services()
    async def create_test_data(self):
        """Create or verify test data exists"""
        logger.info("Creating/verifying test data...")
        test_data_dir = self.test_dir / "fixtures" / "test_data"
        test_data_dir.mkdir(parents=True, exist_ok=True)
        # Create bakery sales sample if it doesn't exist
        sales_file = test_data_dir / "bakery_sales_sample.csv"
        if not sales_file.exists():
            logger.info("Creating sample sales data...")
            await self.generate_sample_sales_data(sales_file)
        # Create weather data sample
        weather_file = test_data_dir / "madrid_weather_sample.json"
        if not weather_file.exists():
            logger.info("Creating sample weather data...")
            await self.generate_sample_weather_data(weather_file)
        # Create traffic data sample
        traffic_file = test_data_dir / "madrid_traffic_sample.json"
        if not traffic_file.exists():
            logger.info("Creating sample traffic data...")
            await self.generate_sample_traffic_data(traffic_file)
    async def generate_sample_sales_data(self, file_path: Path):
        """Generate sample sales data for testing"""
        import pandas as pd
        import numpy as np
        from datetime import datetime, timedelta
        # Generate 6 months of sample data
        start_date = datetime(2023, 6, 1)
        dates = [start_date + timedelta(days=i) for i in range(180)]
        products = ["Pan Integral", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate"]
        data = []
        for date in dates:
            for product in products:
                base_quantity = np.random.randint(10, 100)
                # Weekend boost
                if date.weekday() >= 5:
                    base_quantity *= 1.2
                # Seasonal variation
                temp = 15 + 10 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi)
                data.append({
                    "date": date.strftime("%Y-%m-%d"),
                    "product": product,
                    "quantity": int(base_quantity),
                    "revenue": round(base_quantity * np.random.uniform(2.5, 8.0), 2),
                    "temperature": round(temp + np.random.normal(0, 3), 1),
                    "precipitation": max(0, np.random.exponential(0.5)),
                    "is_weekend": date.weekday() >= 5,
                    "is_holiday": False
                })
        df = pd.DataFrame(data)
        df.to_csv(file_path, index=False)
        logger.info(f"Created sample sales data: {len(df)} records")
    async def generate_sample_weather_data(self, file_path: Path):
        """Generate sample weather data"""
        import json
        from datetime import datetime, timedelta
        import numpy as np
        start_date = datetime(2023, 6, 1)
        weather_data = []
        for i in range(180):
            date = start_date + timedelta(days=i)
            day_of_year = date.timetuple().tm_yday
            base_temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi)
            weather_data.append({
                "date": date.isoformat(),
                "temperature": round(base_temp + np.random.normal(0, 5), 1),
                "precipitation": max(0, np.random.exponential(1.0)),
                "humidity": np.random.uniform(30, 80),
                "wind_speed": np.random.uniform(5, 25),
                "pressure": np.random.uniform(1000, 1025),
                "description": np.random.choice(["Soleado", "Nuboso", "Lluvioso"]),
                "source": "aemet_test"
            })
        with open(file_path, 'w') as f:
            json.dump(weather_data, f, indent=2)
        logger.info(f"Created sample weather data: {len(weather_data)} records")
    async def generate_sample_traffic_data(self, file_path: Path):
        """Generate sample traffic data"""
        import json
        from datetime import datetime, timedelta
        import numpy as np
        start_date = datetime(2023, 6, 1)
        traffic_data = []
        for i in range(180):
            date = start_date + timedelta(days=i)
            for hour in [8, 12, 18]:  # Three measurements per day
                measurement_time = date.replace(hour=hour)
                if hour in [8, 18]:  # Rush hours
                    volume = np.random.randint(800, 1500)
                    congestion = "high"
                else:  # Lunch time
                    volume = np.random.randint(400, 800)
                    congestion = "medium"
                traffic_data.append({
                    "date": measurement_time.isoformat(),
                    "traffic_volume": volume,
                    "occupation_percentage": np.random.randint(10, 90),
                    "load_percentage": np.random.randint(20, 95),
                    "average_speed": np.random.randint(15, 50),
                    "congestion_level": congestion,
                    "pedestrian_count": np.random.randint(50, 500),
                    "measurement_point_id": "TEST_POINT_001",
                    "measurement_point_name": "Plaza Mayor",
                    "road_type": "URB",
                    "source": "madrid_opendata_test"
                })
        with open(file_path, 'w') as f:
            json.dump(traffic_data, f, indent=2)
        logger.info(f"Created sample traffic data: {len(traffic_data)} records")
    async def verify_external_services(self):
        """Verify external services are available (mock or real)"""
        logger.info("Verifying external services...")
        # Check if mock services are available
        mock_services = [
            ("Mock AEMET", "http://localhost:8080/health"),
            ("Mock Madrid OpenData", "http://localhost:8081/health"),
            ("Mock Auth Service", "http://localhost:8082/health"),
            ("Mock Data Service", "http://localhost:8083/health")
        ]
        try:
            import httpx
            async with httpx.AsyncClient(timeout=5.0) as client:
                for service_name, url in mock_services:
                    try:
                        response = await client.get(url)
                        if response.status_code == 200:
                            logger.info(f"{service_name} is available")
                        else:
                            logger.warning(f"{service_name} returned status {response.status_code}")
                    except Exception as e:
                        logger.warning(f"{service_name} is not available: {e}")
        except ImportError:
            logger.warning("httpx not available, skipping service checks")
    def run_test_suite(self, suite_name: str) -> Dict[str, Any]:
        """Run a specific test suite"""
        suite_config = self.test_suites[suite_name]
        logger.info(f"Running {suite_name} test suite: {suite_config['description']}")
        start_time = time.time()
        # Prepare pytest command
        pytest_args = [
            "python", "-m", "pytest",
            "-v",
            "--tb=short",
            "--capture=no",
            f"--junitxml={self.results_dir}/junit_{suite_name}.xml",
            f"--cov=app",
            f"--cov-report=html:{self.results_dir}/coverage_{suite_name}_html",
            f"--cov-report=xml:{self.results_dir}/coverage_{suite_name}.xml",
            "--cov-report=term-missing"
        ]
        # Add test files
        for test_file in suite_config["files"]:
            test_path = self.test_dir / test_file
            if test_path.exists():
                pytest_args.append(str(test_path))
            else:
                logger.warning(f"Test file not found: {test_file}")
        # Run the tests
        try:
            result = subprocess.run(
                pytest_args,
                cwd=self.test_dir.parent,  # Run from training service root
                capture_output=True,
                text=True,
                timeout=suite_config["timeout"]
            )
            duration = time.time() - start_time
            return {
                "suite": suite_name,
                "status": "passed" if result.returncode == 0 else "failed",
                "return_code": result.returncode,
                "duration": duration,
                "stdout": result.stdout,
                "stderr": result.stderr,
                "timestamp": datetime.now().isoformat()
            }
        except subprocess.TimeoutExpired:
            duration = time.time() - start_time
            logger.error(f"Test suite {suite_name} timed out after {duration:.2f}s")
            return {
                "suite": suite_name,
                "status": "timeout",
                "return_code": -1,
                "duration": duration,
                "stdout": "",
                "stderr": f"Test suite timed out after {suite_config['timeout']}s",
                "timestamp": datetime.now().isoformat()
            }
        except Exception as e:
            duration = time.time() - start_time
            logger.error(f"Error running test suite {suite_name}: {e}")
            return {
                "suite": suite_name,
                "status": "error",
                "return_code": -1,
                "duration": duration,
                "stdout": "",
                "stderr": str(e),
                "timestamp": datetime.now().isoformat()
            }
    def generate_test_report(self):
        """Generate comprehensive test report"""
        logger.info("Generating test report...")
        # Calculate summary statistics
        total_suites = len(self.test_results)
        passed_suites = sum(1 for r in self.test_results.values() if r["status"] == "passed")
        failed_suites = sum(1 for r in self.test_results.values() if r["status"] == "failed")
        error_suites = sum(1 for r in self.test_results.values() if r["status"] == "error")
        timeout_suites = sum(1 for r in self.test_results.values() if r["status"] == "timeout")
        total_duration = sum(r["duration"] for r in self.test_results.values())
        # Create detailed report
        report = {
            "test_run_summary": {
                "timestamp": datetime.now().isoformat(),
                "total_suites": total_suites,
                "passed_suites": passed_suites,
                "failed_suites": failed_suites,
                "error_suites": error_suites,
                "timeout_suites": timeout_suites,
                "success_rate": (passed_suites / total_suites * 100) if total_suites > 0 else 0,
                "total_duration_seconds": total_duration
            },
            "suite_results": self.test_results,
            "recommendations": self.generate_recommendations()
        }
        # Save JSON report
        report_file = self.results_dir / "test_report.json"
        with open(report_file, 'w') as f:
            json.dump(report, f, indent=2)
        # Generate HTML report
        self.generate_html_report(report)
        # Print summary to console
        self.print_test_summary(report)
        return report
    def generate_recommendations(self) -> List[str]:
        """Generate recommendations based on test results"""
        recommendations = []
        failed_suites = [name for name, result in self.test_results.items() if result["status"] == "failed"]
        timeout_suites = [name for name, result in self.test_results.items() if result["status"] == "timeout"]
        if failed_suites:
            recommendations.append(f"Failed test suites: {', '.join(failed_suites)}. Check logs for detailed error messages.")
        if timeout_suites:
            recommendations.append(f"Timeout in suites: {', '.join(timeout_suites)}. Consider increasing timeout or optimizing performance.")
        # Performance recommendations
        slow_suites = [
            name for name, result in self.test_results.items() 
            if result["duration"] > 300  # 5 minutes
        ]
        if slow_suites:
            recommendations.append(f"Slow test suites: {', '.join(slow_suites)}. Consider performance optimization.")
        if not recommendations:
            recommendations.append("All tests passed successfully! Consider adding more edge case tests.")
        return recommendations
    def generate_html_report(self, report: Dict[str, Any]):
        """Generate HTML test report"""
        html_template = """
 <!DOCTYPE html>
 <html>
 <head>
    <title>Training Service Test Report</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 40px; }
        .header { background-color: #f8f9fa; padding: 20px; border-radius: 5px; }
        .summary { display: flex; gap: 20px; margin: 20px 0; }
        .metric { background: white; border: 1px solid #dee2e6; padding: 15px; border-radius: 5px; text-align: center; }
        .metric-value { font-size: 24px; font-weight: bold; }
        .passed { color: #28a745; }
        .failed { color: #dc3545; }
        .timeout { color: #fd7e14; }
        .error { color: #6c757d; }
        .suite-result { margin: 20px 0; padding: 15px; border: 1px solid #dee2e6; border-radius: 5px; }
        .recommendations { background-color: #e7f3ff; padding: 15px; border-radius: 5px; margin: 20px 0; }
        pre { background-color: #f8f9fa; padding: 10px; border-radius: 3px; overflow-x: auto; }
    </style>
 </head>
 <body>
    <div class="header">
        <h1>Training Service Test Report</h1>
        <p>Generated: {timestamp}</p>
    </div>
    <div class="summary">
        <div class="metric">
            <div class="metric-value">{total_suites}</div>
            <div>Total Suites</div>
        </div>
        <div class="metric">
            <div class="metric-value passed">{passed_suites}</div>
            <div>Passed</div>
        </div>
        <div class="metric">
            <div class="metric-value failed">{failed_suites}</div>
            <div>Failed</div>
        </div>
        <div class="metric">
            <div class="metric-value timeout">{timeout_suites}</div>
            <div>Timeout</div>
        </div>
        <div class="metric">
            <div class="metric-value">{success_rate:.1f}%</div>
            <div>Success Rate</div>
        </div>
        <div class="metric">
            <div class="metric-value">{duration:.1f}s</div>
            <div>Total Duration</div>
        </div>
    </div>
    <div class="recommendations">
        <h3>Recommendations</h3>
        <ul>
            {recommendations_html}
        </ul>
    </div>
    <h2>Suite Results</h2>
    {suite_results_html}
 </body>
 </html>
        """
        # Format recommendations
        recommendations_html = '\n'.join(
            f"<li>{rec}</li>" for rec in report["recommendations"]
        )
        # Format suite results
        suite_results_html = ""
        for suite_name, result in report["suite_results"].items():
            status_class = result["status"]
            suite_results_html += f"""
            <div class="suite-result">
                <h3>{suite_name.title()} Tests <span class="{status_class}">({result["status"].upper()})</span></h3>
                <p><strong>Duration:</strong> {result["duration"]:.2f}s</p>
                <p><strong>Return Code:</strong> {result["return_code"]}</p>
                {f'<h4>Output:</h4><pre>{result["stdout"][:1000]}{"..." if len(result["stdout"]) > 1000 else ""}</pre>' if result["stdout"] else ""}
                {f'<h4>Errors:</h4><pre>{result["stderr"][:1000]}{"..." if len(result["stderr"]) > 1000 else ""}</pre>' if result["stderr"] else ""}
            </div>
            """
        # Fill template
        html_content = html_template.format(
            timestamp=report["test_run_summary"]["timestamp"],
            total_suites=report["test_run_summary"]["total_suites"],
            passed_suites=report["test_run_summary"]["passed_suites"],
            failed_suites=report["test_run_summary"]["failed_suites"],
            timeout_suites=report["test_run_summary"]["timeout_suites"],
            success_rate=report["test_run_summary"]["success_rate"],
            duration=report["test_run_summary"]["total_duration_seconds"],
            recommendations_html=recommendations_html,
            suite_results_html=suite_results_html
        )
        # Save HTML report
        html_file = self.results_dir / "test_report.html"
        with open(html_file, 'w') as f:
            f.write(html_content)
        logger.info(f"HTML report saved to: {html_file}")
    def print_test_summary(self, report: Dict[str, Any]):
        """Print test summary to console"""
        summary = report["test_run_summary"]
        print("\n" + "=" * 80)
        print("TRAINING SERVICE TEST RESULTS SUMMARY")
        print("=" * 80)
        print(f"Timestamp: {summary['timestamp']}")
        print(f"Total Suites: {summary['total_suites']}")
        print(f"Passed: {summary['passed_suites']}")
        print(f"Failed: {summary['failed_suites']}")
        print(f"Errors: {summary['error_suites']}")
        print(f"Timeouts: {summary['timeout_suites']}")
        print(f"Success Rate: {summary['success_rate']:.1f}%")
        print(f"Total Duration: {summary['total_duration_seconds']:.2f}s")
        print("\nSUITE DETAILS:")
        print("-" * 50)
        for suite_name, result in report["suite_results"].items():
            status_icon = "✅" if result["status"] == "passed" else "❌"
            print(f"{status_icon} {suite_name.ljust(15)}: {result['status'].upper().ljust(10)} ({result['duration']:.2f}s)")
        print("\nRECOMMENDATIONS:")
        print("-" * 50)
        for i, rec in enumerate(report["recommendations"], 1):
            print(f"{i}. {rec}")
        print("\nFILES GENERATED:")
        print("-" * 50)
        print(f"📄 JSON Report: {self.results_dir}/test_report.json")
        print(f"🌐 HTML Report: {self.results_dir}/test_report.html")
        print(f"📊 Coverage Reports: {self.results_dir}/coverage_*_html/")
        print(f"📋 JUnit XML: {self.results_dir}/junit_*.xml")
        print("=" * 80)
    async def run_all_tests(self):
        """Run all test suites"""
        logger.info("Starting comprehensive test run...")
        # Setup environment
        await self.setup_test_environment()
        # Run each test suite
        for suite_name in self.test_suites.keys():
            logger.info(f"Starting {suite_name} test suite...")
            result = self.run_test_suite(suite_name)
            self.test_results[suite_name] = result
            if result["status"] == "passed":
                logger.info(f"✅ {suite_name} tests PASSED ({result['duration']:.2f}s)")
            elif result["status"] == "failed":
                logger.error(f"❌ {suite_name} tests FAILED ({result['duration']:.2f}s)")
            elif result["status"] == "timeout":
                logger.error(f"⏰ {suite_name} tests TIMED OUT ({result['duration']:.2f}s)")
            else:
                logger.error(f"💥 {suite_name} tests ERROR ({result['duration']:.2f}s)")
        # Generate final report
        report = self.generate_test_report()
        return report
    def run_specific_suite(self, suite_name: str):
        """Run a specific test suite"""
        if suite_name not in self.test_suites:
            logger.error(f"Unknown test suite: {suite_name}")
            logger.info(f"Available suites: {', '.join(self.test_suites.keys())}")
            return None
        logger.info(f"Running {suite_name} test suite only...")
        result = self.run_test_suite(suite_name)
        self.test_results[suite_name] = result
        # Generate report for single suite
        report = self.generate_test_report()
        return report
 # ================================================================
 # MAIN EXECUTION
 # ================================================================
 async def main():
    """Main execution function"""
    import argparse
    parser = argparse.ArgumentParser(description="Training Service Test Runner")
    parser.add_argument(
        "--suite", 
        choices=list(TrainingTestRunner().test_suites.keys()) + ["all"],
        default="all",
        help="Test suite to run (default: all)"
    )
    parser.add_argument(
        "--verbose", "-v",
        action="store_true",
        help="Verbose output"
    )
    parser.add_argument(
        "--quick",
        action="store_true", 
        help="Run quick tests only (skip performance tests)"
    )
    args = parser.parse_args()
    # Setup logging level
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    # Create test runner
    runner = TrainingTestRunner()
    # Modify test suites for quick run
    if args.quick:
        # Skip performance tests in quick mode
        if "performance" in runner.test_suites:
            del runner.test_suites["performance"]
        logger.info("Quick mode: Skipping performance tests")
    try:
        if args.suite == "all":
            report = await runner.run_all_tests()
        else:
            report = runner.run_specific_suite(args.suite)
        # Exit with appropriate code
        if report and report["test_run_summary"]["failed_suites"] == 0 and report["test_run_summary"]["error_suites"] == 0:
            logger.info("All tests completed successfully!")
            sys.exit(0)
        else:
            logger.error("Some tests failed!")
            sys.exit(1)
    except KeyboardInterrupt:
        logger.info("Test run interrupted by user")
        sys.exit(130)
    except Exception as e:
        logger.error(f"Test run failed with error: {e}")
        sys.exit(1)
 if __name__ == "__main__":
    # Handle both direct execution and pytest discovery
    if len(sys.argv) > 1 and sys.argv[1] in ["--suite", "-h", "--help"]:
        # Running as main script with arguments
        asyncio.run(main())
    else:
        # Running as pytest discovery or direct execution without args
        print("Training Service Test Runner")
        print("=" * 50)
        print("Usage:")
        print("  python run_tests.py --suite all      # Run all test suites")
        print("  python run_tests.py --suite unit     # Run unit tests only") 
        print("  python run_tests.py --suite integration  # Run integration tests only")
        print("  python run_tests.py --suite performance  # Run performance tests only")
        print("  python run_tests.py --quick          # Run quick tests (skip performance)")
        print("  python run_tests.py -v               # Verbose output")
        print()
        print("Available test suites:")
        runner = TrainingTestRunner()
        for suite_name, config in runner.test_suites.items():
            print(f"  {suite_name.ljust(15)}: {config['description']}")
        print()
        # If no arguments provided, run all tests
        if len(sys.argv) == 1:
            print("No arguments provided. Running all tests...")
            asyncio.run(TrainingTestRunner().run_all_tests())
--- a/services/training/tests/test_api.py
+++ b/services/training/tests/test_api.py
@@ -1,687 +0,0 @@
 # services/training/tests/test_api.py
 """
 Tests for training service API endpoints
 """
 import pytest
 from unittest.mock import AsyncMock, patch
 from fastapi import status
 from httpx import AsyncClient
 from app.schemas.training import TrainingJobRequest
 class TestTrainingAPI:
    """Test training API endpoints"""
    @pytest.mark.asyncio
    async def test_health_check(self, test_client: AsyncClient):
        """Test health check endpoint"""
        response = await test_client.get("/health")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert data["service"] == "training-service"
        assert data["version"] == "1.0.0"
        assert "status" in data
    @pytest.mark.asyncio
    async def test_readiness_check_ready(self, test_client: AsyncClient):
        """Test readiness check when service is ready"""
        # Mock app state as ready
        from app.main import app  # Add import at top
        with patch.object(app.state, 'ready', True, create=True):
            response = await test_client.get("/health/ready")
            assert response.status_code == status.HTTP_200_OK
            data = response.json()
            assert data["status"] == "ready"
    @pytest.mark.asyncio
    async def test_readiness_check_not_ready(self, test_client: AsyncClient):
        """Test readiness check when service is not ready"""
        with patch('app.main.app.state.ready', False):
            response = await test_client.get("/health/ready")
            assert response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
            data = response.json()
            assert data["status"] == "not_ready"
    @pytest.mark.asyncio
    async def test_liveness_check_healthy(self, test_client: AsyncClient):
        """Test liveness check when service is healthy"""
        with patch('app.core.database.get_db_health', return_value=AsyncMock(return_value=True)):
            response = await test_client.get("/health/live")
            assert response.status_code == status.HTTP_200_OK
            data = response.json()
            assert data["status"] == "alive"
    @pytest.mark.asyncio
    async def test_liveness_check_unhealthy(self, test_client: AsyncClient):
        """Test liveness check when database is unhealthy"""
        with patch('app.core.database.get_db_health', return_value=AsyncMock(return_value=False)):
            response = await test_client.get("/health/live")
            assert response.status_code == status.HTTP_503_SERVICE_UNAVAILABLE
            data = response.json()
            assert data["status"] == "unhealthy"
            assert data["reason"] == "database_unavailable"
    @pytest.mark.asyncio
    async def test_metrics_endpoint(self, test_client: AsyncClient):
        """Test metrics endpoint"""
        response = await test_client.get("/metrics")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        expected_metrics = [
            "training_jobs_active",
            "training_jobs_completed", 
            "training_jobs_failed",
            "models_trained_total",
            "uptime_seconds"
        ]
        for metric in expected_metrics:
            assert metric in data
    @pytest.mark.asyncio
    async def test_root_endpoint(self, test_client: AsyncClient):
        """Test root endpoint"""
        response = await test_client.get("/")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert data["service"] == "training-service"
        assert data["version"] == "1.0.0"
        assert "description" in data
 class TestTrainingJobsAPI:
    """Test training jobs API endpoints"""
    @pytest.mark.asyncio
    async def test_start_training_job_success(
        self, 
        test_client: AsyncClient, 
        mock_messaging,
        mock_ml_trainer,
        mock_data_service
    ):
        """Test starting a training job successfully"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30,
            "seasonality_mode": "additive"
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=request_data)
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert "job_id" in data
        assert data["status"] == "started"
        assert data["tenant_id"] == "test-tenant"
        assert "estimated_duration_minutes" in data
    @pytest.mark.asyncio
    async def test_start_training_job_validation_error(self, test_client: AsyncClient):
        """Test starting training job with validation error"""
        request_data = {
            "seasonality_mode": "invalid_mode",  # Invalid value
            "min_data_points": 5  # Too low
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=request_data)
        assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
    @pytest.mark.asyncio
    async def test_get_training_status_existing_job(
        self, 
        test_client: AsyncClient,
        training_job_in_db
    ):
        """Test getting status of existing training job"""
        job_id = training_job_in_db.job_id
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert data["job_id"] == job_id
        assert data["status"] == "pending"
        assert "progress" in data
        assert "started_at" in data
    @pytest.mark.asyncio
    async def test_get_training_status_nonexistent_job(self, test_client: AsyncClient):
        """Test getting status of non-existent training job"""
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get("/training/jobs/nonexistent-job/status")
        assert response.status_code == status.HTTP_404_NOT_FOUND
    @pytest.mark.asyncio
    async def test_list_training_jobs(
        self, 
        test_client: AsyncClient,
        training_job_in_db
    ):
        """Test listing training jobs"""
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get("/training/jobs")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert isinstance(data, list)
        assert len(data) >= 1
        # Check first job structure
        job = data[0]
        assert "job_id" in job
        assert "status" in job
        assert "started_at" in job
    @pytest.mark.asyncio
    async def test_list_training_jobs_with_status_filter(
        self, 
        test_client: AsyncClient,
        training_job_in_db
    ):
        """Test listing training jobs with status filter"""
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get("/training/jobs?status=pending")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert isinstance(data, list)
        # All jobs should have status "pending"
        for job in data:
            assert job["status"] == "pending"
    @pytest.mark.asyncio
    async def test_cancel_training_job_success(
        self, 
        test_client: AsyncClient,
        training_job_in_db,
        mock_messaging
    ):
        """Test cancelling a training job successfully"""
        job_id = training_job_in_db.job_id
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(f"/training/jobs/{job_id}/cancel")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert "message" in data
        assert "cancelled" in data["message"].lower()
    @pytest.mark.asyncio
    async def test_cancel_nonexistent_job(self, test_client: AsyncClient):
        """Test cancelling a non-existent training job"""
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs/nonexistent-job/cancel")
        assert response.status_code == status.HTTP_404_NOT_FOUND
    @pytest.mark.asyncio
    async def test_get_training_logs(
        self, 
        test_client: AsyncClient,
        training_job_in_db
    ):
        """Test getting training logs"""
        job_id = training_job_in_db.job_id
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/logs")
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert "job_id" in data
        assert "logs" in data
        assert isinstance(data["logs"], list)
    @pytest.mark.asyncio
    async def test_validate_training_data_valid(
        self, 
        test_client: AsyncClient,
        mock_data_service
    ):
        """Test validating valid training data"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/validate", json=request_data)
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert "is_valid" in data
        assert "issues" in data
        assert "recommendations" in data
        assert "estimated_training_time" in data
 class TestSingleProductTrainingAPI:
    """Test single product training API endpoints"""
    @pytest.mark.asyncio
    async def test_train_single_product_success(
        self, 
        test_client: AsyncClient,
        mock_messaging,
        mock_ml_trainer,
        mock_data_service
    ):
        """Test training a single product successfully"""
        product_name = "Pan Integral"
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "seasonality_mode": "additive"
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                f"/training/products/{product_name}", 
                json=request_data
            )
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert "job_id" in data
        assert data["status"] == "started"
        assert data["tenant_id"] == "test-tenant"
        assert f"training started for {product_name}" in data["message"].lower()
    @pytest.mark.asyncio
    async def test_train_single_product_validation_error(self, test_client: AsyncClient):
        """Test single product training with validation error"""
        product_name = "Pan Integral"
        request_data = {
            "seasonality_mode": "invalid_mode"  # Invalid value
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                f"/training/products/{product_name}", 
                json=request_data
            )
        assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
    @pytest.mark.asyncio
    async def test_train_single_product_special_characters(
        self, 
        test_client: AsyncClient,
        mock_messaging,
        mock_ml_trainer,
        mock_data_service
    ):
        """Test training product with special characters in name"""
        product_name = "Pan Francés"  # With accent
        request_data = {
            "include_weather": True,
            "seasonality_mode": "additive"
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                f"/training/products/{product_name}", 
                json=request_data
            )
        assert response.status_code == status.HTTP_200_OK
        data = response.json()
        assert "job_id" in data
 class TestModelsAPI:
    """Test models API endpoints"""
    @pytest.mark.asyncio
    async def test_list_models(
        self, 
        test_client: AsyncClient,
        trained_model_in_db
    ):
        """Test listing trained models"""
        with patch('app.api.models.get_current_tenant_id', return_value="test-tenant"):
            response = await test_client.get("/models")
        # This endpoint might not exist yet, so we expect either 200 or 404
        assert response.status_code in [status.HTTP_200_OK, status.HTTP_404_NOT_FOUND]
        if response.status_code == status.HTTP_200_OK:
            data = response.json()
            assert isinstance(data, list)
    @pytest.mark.asyncio
    async def test_get_model_details(
        self, 
        test_client: AsyncClient,
        trained_model_in_db
    ):
        """Test getting model details"""
        model_id = trained_model_in_db.model_id
        with patch('app.api.models.get_current_tenant_id', return_value="test-tenant"):
            response = await test_client.get(f"/models/{model_id}")
        # This endpoint might not exist yet
        assert response.status_code in [
            status.HTTP_200_OK, 
            status.HTTP_404_NOT_FOUND,
            status.HTTP_501_NOT_IMPLEMENTED
        ]
 class TestErrorHandling:
    """Test error handling in API endpoints"""
    @pytest.mark.asyncio
    async def test_database_error_handling(self, test_client: AsyncClient):
        """Test handling of database errors"""
        with patch('app.services.training_service.TrainingService.create_training_job') as mock_create:
            mock_create.side_effect = Exception("Database connection failed")
            request_data = {
                "include_weather": True,
                "include_traffic": True,
                "min_data_points": 30
            }
            with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
                response = await test_client.post("/training/jobs", json=request_data)
            assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR
    @pytest.mark.asyncio
    async def test_missing_tenant_id(self, test_client: AsyncClient):
        """Test handling when tenant ID is missing"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        # Don't mock get_current_tenant_id to simulate missing auth
        response = await test_client.post("/training/jobs", json=request_data)
        # Should fail due to missing authentication
        assert response.status_code in [status.HTTP_401_UNAUTHORIZED, status.HTTP_403_FORBIDDEN]
    @pytest.mark.asyncio
    async def test_invalid_job_id_format(self, test_client: AsyncClient):
        """Test handling of invalid job ID format"""
        invalid_job_id = "invalid-job-id-with-special-chars@#$"
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{invalid_job_id}/status")
        # Should handle gracefully
        assert response.status_code in [status.HTTP_404_NOT_FOUND, status.HTTP_400_BAD_REQUEST]
    @pytest.mark.asyncio
    async def test_messaging_failure_handling(
        self, 
        test_client: AsyncClient,
        mock_data_service
    ):
        """Test handling when messaging fails"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        with patch('app.services.messaging.publish_job_started', side_effect=Exception("Messaging failed")), \
             patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=request_data)
            # Should still succeed even if messaging fails
            assert response.status_code == status.HTTP_200_OK
            data = response.json()
            assert "job_id" in data
    @pytest.mark.asyncio
    async def test_invalid_json_payload(self, test_client: AsyncClient):
        """Test handling of invalid JSON payload"""
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                "/training/jobs", 
                content="invalid json {{{",
                headers={"Content-Type": "application/json"}
            )
        assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
    @pytest.mark.asyncio
    async def test_unsupported_content_type(self, test_client: AsyncClient):
        """Test handling of unsupported content type"""
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                "/training/jobs",
                content="some text data",
                headers={"Content-Type": "text/plain"}
            )
        assert response.status_code in [
            status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
            status.HTTP_422_UNPROCESSABLE_ENTITY
        ]
 class TestAuthenticationIntegration:
    """Test authentication integration"""
    @pytest.mark.asyncio
    async def test_endpoints_require_auth(self, test_client: AsyncClient):
        """Test that endpoints require authentication in production"""
        # This test would be more meaningful in a production environment
        # where authentication is actually enforced
        endpoints_to_test = [
            ("POST", "/training/jobs"),
            ("GET", "/training/jobs"),
            ("POST", "/training/products/Pan Integral"),
            ("POST", "/training/validate")
        ]
        for method, endpoint in endpoints_to_test:
            if method == "POST":
                response = await test_client.post(endpoint, json={})
            else:
                response = await test_client.get(endpoint)
            # In test environment with mocked auth, should work
            # In production, would require valid authentication
            assert response.status_code != status.HTTP_500_INTERNAL_SERVER_ERROR
    @pytest.mark.asyncio
    async def test_tenant_isolation_in_api(
        self, 
        test_client: AsyncClient,
        training_job_in_db
    ):
        """Test tenant isolation at API level"""
        job_id = training_job_in_db.job_id
        # Try to access job with different tenant
        with patch('app.api.training.get_current_tenant_id', return_value="different-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        # Should not find job for different tenant
        assert response.status_code == status.HTTP_404_NOT_FOUND
 class TestAPIValidation:
    """Test API validation and input handling"""
    @pytest.mark.asyncio
    async def test_training_request_validation(self, test_client: AsyncClient):
        """Test comprehensive training request validation"""
        # Test valid request
        valid_request = {
            "include_weather": True,
            "include_traffic": False,
            "min_data_points": 30,
            "seasonality_mode": "additive",
            "daily_seasonality": True,
            "weekly_seasonality": True,
            "yearly_seasonality": True
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=valid_request)
        assert response.status_code == status.HTTP_200_OK
        # Test invalid seasonality mode
        invalid_request = valid_request.copy()
        invalid_request["seasonality_mode"] = "invalid_mode"
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=invalid_request)
        assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
        # Test invalid min_data_points
        invalid_request = valid_request.copy()
        invalid_request["min_data_points"] = 5  # Too low
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=invalid_request)
        assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY
    @pytest.mark.asyncio
    async def test_single_product_request_validation(self, test_client: AsyncClient):
        """Test single product training request validation"""
        product_name = "Pan Integral"
        # Test valid request
        valid_request = {
            "include_weather": True,
            "include_traffic": True,
            "seasonality_mode": "multiplicative"
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                f"/training/products/{product_name}",
                json=valid_request
            )
        assert response.status_code == status.HTTP_200_OK
        # Test empty product name
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                "/training/products/",
                json=valid_request
            )
        assert response.status_code == status.HTTP_404_NOT_FOUND
    @pytest.mark.asyncio
    async def test_query_parameter_validation(self, test_client: AsyncClient):
        """Test query parameter validation"""
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            # Test valid limit parameter
            response = await test_client.get("/training/jobs?limit=5")
            assert response.status_code == status.HTTP_200_OK
            # Test invalid limit parameter
            response = await test_client.get("/training/jobs?limit=invalid")
            assert response.status_code in [
                status.HTTP_422_UNPROCESSABLE_ENTITY, 
                status.HTTP_400_BAD_REQUEST
            ]
            # Test negative limit
            response = await test_client.get("/training/jobs?limit=-1")
            assert response.status_code in [
                status.HTTP_422_UNPROCESSABLE_ENTITY,
                status.HTTP_400_BAD_REQUEST
            ]
 class TestAPIPerformance:
    """Test API performance characteristics"""
    @pytest.mark.asyncio
    async def test_concurrent_requests(self, test_client: AsyncClient):
        """Test handling of concurrent requests"""
        import asyncio
        # Create multiple concurrent requests
        tasks = []
        for i in range(10):
            with patch('app.api.training.get_current_tenant_id', return_value=f"tenant-{i}"):
                task = test_client.get("/health")
                tasks.append(task)
        responses = await asyncio.gather(*tasks)
        # All requests should succeed
        for response in responses:
            assert response.status_code == status.HTTP_200_OK
    @pytest.mark.asyncio
    async def test_large_payload_handling(self, test_client: AsyncClient):
        """Test handling of large request payloads"""
        # Create large request payload
        large_request = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30,
            "large_config": {f"key_{i}": f"value_{i}" for i in range(1000)}
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=large_request)
        # Should handle large payload gracefully
        assert response.status_code in [
            status.HTTP_200_OK,
            status.HTTP_413_REQUEST_ENTITY_TOO_LARGE
        ]
    @pytest.mark.asyncio
    async def test_rapid_successive_requests(self, test_client: AsyncClient):
        """Test rapid successive requests to same endpoint"""
        # Make rapid requests
        responses = []
        for _ in range(20):
            response = await test_client.get("/health")
            responses.append(response)
        # All should succeed
        for response in responses:
            assert response.status_code == status.HTTP_200_OK
--- a/services/training/tests/test_end_to_end.py
+++ b/services/training/tests/test_end_to_end.py
@@ -1,311 +0,0 @@
 # ================================================================
 # services/training/tests/test_end_to_end.py
 # ================================================================
 """
 End-to-End Testing for Training Service
 Tests complete workflows from API to ML pipeline to results
 """
 import pytest
 import asyncio
 import httpx
 import pandas as pd
 import json
 import tempfile
 import time
 from datetime import datetime, timedelta
 from typing import Dict, List, Any
 from unittest.mock import patch, AsyncMock
 import uuid
 from app.main import app
 from app.schemas.training import TrainingJobRequest, SingleProductTrainingRequest
 class TestTrainingServiceEndToEnd:
    """End-to-end tests for complete training workflows"""
    @pytest.fixture
    async def test_client(self):
        """Create test client for the training service"""
        from httpx import AsyncClient
        async with AsyncClient(app=app, base_url="http://test") as client:
            yield client
    @pytest.fixture
    def real_bakery_data(self):
        """Use the actual bakery sales data from the uploaded CSV"""
        # This fixture would load the real bakery_sales_2023_2024.csv data
        # For testing, we'll simulate the structure based on the document description
        # Generate realistic data matching the CSV structure
        start_date = datetime(2023, 1, 1)
        dates = [start_date + timedelta(days=i) for i in range(365)]
        products = [
            "Pan Integral", "Pan Blanco", "Croissant", "Magdalenas", 
            "Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras"
        ]
        data = []
        for date in dates:
            for product in products:
                # Realistic sales patterns for Madrid bakery
                base_quantity = {
                    "Pan Integral": 80, "Pan Blanco": 120, "Croissant": 45,
                    "Magdalenas": 30, "Empanadas": 25, "Tarta Chocolate": 15,
                    "Roscon Reyes": 8, "Palmeras": 12
                }.get(product, 20)
                # Seasonal variations
                if date.month == 12 and product == "Roscon Reyes":
                    base_quantity *= 5  # Christmas specialty
                elif date.month in [6, 7, 8]:  # Summer
                    base_quantity *= 0.85
                elif date.month in [11, 12, 1]:  # Winter
                    base_quantity *= 1.15
                # Weekly patterns
                if date.weekday() >= 5:  # Weekends
                    base_quantity *= 1.3
                elif date.weekday() == 0:  # Monday slower
                    base_quantity *= 0.8
                # Weather influence
                temp = 15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi)
                if temp > 30:  # Very hot days
                    if product in ["Pan Integral", "Pan Blanco"]:
                        base_quantity *= 0.7
                elif temp < 5:  # Cold days
                    base_quantity *= 1.1
                # Add realistic noise
                import numpy as np
                quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.15)))
                # Calculate revenue (realistic Spanish bakery prices)
                price_per_unit = {
                    "Pan Integral": 2.80, "Pan Blanco": 2.50, "Croissant": 1.50,
                    "Magdalenas": 1.20, "Empanadas": 3.50, "Tarta Chocolate": 18.00,
                    "Roscon Reyes": 25.00, "Palmeras": 1.80
                }.get(product, 2.00)
                revenue = round(quantity * price_per_unit, 2)
                data.append({
                    "date": date.strftime("%Y-%m-%d"),
                    "product": product,
                    "quantity": quantity,
                    "revenue": revenue,
                    "temperature": round(temp + np.random.normal(0, 3), 1),
                    "precipitation": max(0, np.random.exponential(0.8)),
                    "is_weekend": date.weekday() >= 5,
                    "is_holiday": self._is_spanish_holiday(date)
                })
        return pd.DataFrame(data)
    def _is_spanish_holiday(self, date: datetime) -> bool:
        """Check if date is a Spanish holiday"""
        spanish_holidays = [
            (1, 1),   # Año Nuevo
            (1, 6),   # Reyes Magos
            (5, 1),   # Día del Trabajo
            (8, 15),  # Asunción de la Virgen
            (10, 12), # Fiesta Nacional de España
            (11, 1),  # Todos los Santos
            (12, 6),  # Día de la Constitución
            (12, 8),  # Inmaculada Concepción
            (12, 25), # Navidad
        ]
        return (date.month, date.day) in spanish_holidays
    @pytest.fixture
    async def mock_external_apis(self):
        """Mock external APIs (AEMET and Madrid OpenData)"""
        with patch('app.external.aemet.AEMETClient') as mock_aemet, \
             patch('app.external.madrid_opendata.MadridOpenDataClient') as mock_madrid:
            # Mock AEMET weather data
            mock_aemet_instance = AsyncMock()
            mock_aemet.return_value = mock_aemet_instance
            # Generate realistic Madrid weather data
            weather_data = []
            for i in range(365):
                date = datetime(2023, 1, 1) + timedelta(days=i)
                day_of_year = date.timetuple().tm_yday
                # Madrid climate: hot summers, mild winters
                base_temp = 14 + 12 * np.sin((day_of_year / 365) * 2 * np.pi)
                weather_data.append({
                    "date": date,
                    "temperature": round(base_temp + np.random.normal(0, 4), 1),
                    "precipitation": max(0, np.random.exponential(1.2)),
                    "humidity": np.random.uniform(25, 75),
                    "wind_speed": np.random.uniform(3, 20),
                    "pressure": np.random.uniform(995, 1025),
                    "description": np.random.choice([
                        "Soleado", "Parcialmente nublado", "Nublado", 
                        "Lluvia ligera", "Despejado"
                    ]),
                    "source": "aemet"
                })
            mock_aemet_instance.get_historical_weather.return_value = weather_data
            mock_aemet_instance.get_current_weather.return_value = weather_data[-1]
            # Mock Madrid traffic data
            mock_madrid_instance = AsyncMock()
            mock_madrid.return_value = mock_madrid_instance
            traffic_data = []
            for i in range(365):
                date = datetime(2023, 1, 1) + timedelta(days=i)
                # Multiple measurements per day
                for hour in range(6, 22, 2):  # Every 2 hours from 6 AM to 10 PM
                    measurement_time = date.replace(hour=hour)
                    # Realistic Madrid traffic patterns
                    if hour in [7, 8, 9, 18, 19, 20]:  # Rush hours
                        volume = np.random.randint(1200, 2000)
                        congestion = "high"
                        speed = np.random.randint(10, 25)
                    elif hour in [12, 13, 14]:  # Lunch time
                        volume = np.random.randint(800, 1200)
                        congestion = "medium"
                        speed = np.random.randint(20, 35)
                    else:  # Off-peak
                        volume = np.random.randint(300, 800)
                        congestion = "low"
                        speed = np.random.randint(30, 50)
                    traffic_data.append({
                        "date": measurement_time,
                        "traffic_volume": volume,
                        "occupation_percentage": np.random.randint(15, 85),
                        "load_percentage": np.random.randint(25, 90),
                        "average_speed": speed,
                        "congestion_level": congestion,
                        "pedestrian_count": np.random.randint(100, 800),
                        "measurement_point_id": "MADRID_CENTER_001",
                        "measurement_point_name": "Puerta del Sol",
                        "road_type": "URB",
                        "source": "madrid_opendata"
                    })
            mock_madrid_instance.get_historical_traffic.return_value = traffic_data
            mock_madrid_instance.get_current_traffic.return_value = traffic_data[-1]
            yield {
                'aemet': mock_aemet_instance,
                'madrid': mock_madrid_instance
            }
    @pytest.mark.asyncio
    async def test_complete_training_workflow_api(
        self, 
        test_client, 
        real_bakery_data, 
        mock_external_apis
    ):
        """Test complete training workflow through API endpoints"""
        # Step 1: Check service health
        health_response = await test_client.get("/health")
        assert health_response.status_code == 200
        health_data = health_response.json()
        assert health_data["status"] == "healthy"
        # Step 2: Validate training data quality
        with patch('app.services.training_service.TrainingService._fetch_sales_data', 
                  return_value=real_bakery_data):
            validation_response = await test_client.post(
                "/training/validate",
                json={
                    "tenant_id": "test_bakery_001",
                    "include_weather": True,
                    "include_traffic": True
                }
            )
            assert validation_response.status_code == 200
            validation_data = validation_response.json()
            assert validation_data["is_valid"] is True
            assert validation_data["data_points"] > 1000  # Sufficient data
            assert validation_data["missing_percentage"] < 10
        # Step 3: Start training job for multiple products
        training_request = {
            "products": ["Pan Integral", "Croissant", "Magdalenas"],
            "include_weather": True,
            "include_traffic": True,
            "config": {
                "seasonality_mode": "additive",
                "changepoint_prior_scale": 0.05,
                "seasonality_prior_scale": 10.0,
                "validation_enabled": True
            }
        }
        with patch('app.services.training_service.TrainingService._fetch_sales_data', 
                  return_value=real_bakery_data):
            start_response = await test_client.post(
                "/training/jobs",
                json=training_request,
                headers={"X-Tenant-ID": "test_bakery_001"}
            )
            assert start_response.status_code == 201
            job_data = start_response.json()
            job_id = job_data["job_id"]
            assert job_data["status"] == "pending"
        # Step 4: Monitor job progress
        max_wait_time = 300  # 5 minutes
        start_time = time.time()
        while time.time() - start_time < max_wait_time:
            status_response = await test_client.get(f"/training/jobs/{job_id}/status")
            assert status_response.status_code == 200
            status_data = status_response.json()
            if status_data["status"] == "completed":
                # Training completed successfully
                assert "models_trained" in status_data
                assert len(status_data["models_trained"]) == 3  # Three products
                # Check model quality
                for model_info in status_data["models_trained"]:
                    assert "product_name" in model_info
                    assert "model_id" in model_info
                    assert "metrics" in model_info
                    metrics = model_info["metrics"]
                    assert "mape" in metrics
                    assert "rmse" in metrics
                    assert "mae" in metrics
                    # Quality thresholds for bakery data
                    assert metrics["mape"] < 50, f"MAPE too high for {model_info['product_name']}: {metrics['mape']}"
                    assert metrics["rmse"] > 0
                break
            elif status_data["status"] == "failed":
                pytest.fail(f"Training job failed: {status_data.get('error_message', 'Unknown error')}")
            # Wait before checking again
            await asyncio.sleep(10)
        else:
            pytest.fail(f"Training job did not complete within {max_wait_time} seconds")
        # Step 5: Get detailed job logs
        logs_response = await test_client.get(f"/training/jobs/{job_id}/logs")
        assert logs_response.status_code == 200
        logs_data = logs_response.json()
        assert "logs" in logs_data
        assert len(logs_data["logs"]) > 0
--- a/services/training/tests/test_integration.py
+++ b/services/training/tests/test_integration.py
@@ -1,848 +0,0 @@
 # services/training/tests/test_integration.py
 """
 Integration tests for training service
 Tests complete workflows and service interactions
 """
 import pytest
 import asyncio
 from unittest.mock import AsyncMock, Mock, patch
 from httpx import AsyncClient
 from datetime import datetime, timedelta
 from app.main import app
 from app.schemas.training import TrainingJobRequest
 class TestTrainingWorkflowIntegration:
    """Test complete training workflows end-to-end"""
    @pytest.mark.asyncio
    async def test_complete_training_workflow(
        self, 
        test_client: AsyncClient,
        test_db_session,
        mock_messaging,
        mock_data_service,
        mock_ml_trainer
    ):
        """Test complete training workflow from API to completion"""
        # Step 1: Start training job
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30,
            "seasonality_mode": "additive"
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=request_data)
        assert response.status_code == 200
        job_data = response.json()
        job_id = job_data["job_id"]
        # Step 2: Check initial status
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        assert response.status_code == 200
        status_data = response.json()
        assert status_data["status"] in ["pending", "started"]
        # Step 3: Simulate background task completion
        # In real scenario, this would be handled by background tasks
        await asyncio.sleep(0.1)  # Allow background task to start
        # Step 4: Check completion status
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        # The job should exist in database even if not completed yet
        assert response.status_code == 200
    @pytest.mark.asyncio
    async def test_single_product_training_workflow(
        self,
        test_client: AsyncClient,
        mock_messaging,
        mock_data_service,
        mock_ml_trainer
    ):
        """Test single product training complete workflow"""
        product_name = "Pan Integral"
        request_data = {
            "include_weather": True,
            "include_traffic": False,
            "seasonality_mode": "additive"
        }
        # Start single product training
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(
                f"/training/products/{product_name}",
                json=request_data
            )
        assert response.status_code == 200
        job_data = response.json()
        job_id = job_data["job_id"]
        assert f"training started for {product_name}" in job_data["message"].lower()
        # Check job status
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        assert response.status_code == 200
        status_data = response.json()
        assert status_data["job_id"] == job_id
    @pytest.mark.asyncio
    async def test_training_validation_workflow(
        self,
        test_client: AsyncClient,
        mock_data_service
    ):
        """Test training data validation workflow"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        # Validate training data
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/validate", json=request_data)
        assert response.status_code == 200
        validation_data = response.json()
        assert "is_valid" in validation_data
        assert "issues" in validation_data
        assert "recommendations" in validation_data
        assert "estimated_training_time" in validation_data
        # If validation passes, start actual training
        if validation_data["is_valid"]:
            with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
                response = await test_client.post("/training/jobs", json=request_data)
            assert response.status_code == 200
    @pytest.mark.asyncio
    async def test_job_cancellation_workflow(
        self,
        test_client: AsyncClient,
        training_job_in_db,
        mock_messaging
    ):
        """Test job cancellation workflow"""
        job_id = training_job_in_db.job_id
        # Check initial status
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        assert response.status_code == 200
        initial_status = response.json()
        assert initial_status["status"] == "pending"
        # Cancel the job
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post(f"/training/jobs/{job_id}/cancel")
        assert response.status_code == 200
        cancel_response = response.json()
        assert "cancelled" in cancel_response["message"].lower()
        # Verify cancellation
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        assert response.status_code == 200
        final_status = response.json()
        assert final_status["status"] == "cancelled"
 class TestServiceInteractionIntegration:
    """Test interactions between training service and external services"""
    @pytest.mark.asyncio
    async def test_data_service_integration(self, training_service, mock_data_service):
        """Test integration with data service"""
        from app.schemas.training import TrainingJobRequest
        request = TrainingJobRequest(
            include_weather=True,
            include_traffic=True,
            min_data_points=30
        )
        # Test sales data fetching
        sales_data = await training_service._fetch_sales_data("test-tenant", request)
        assert isinstance(sales_data, list)
        # Test weather data fetching
        weather_data = await training_service._fetch_weather_data("test-tenant", request)
        assert isinstance(weather_data, list)
        # Test traffic data fetching
        traffic_data = await training_service._fetch_traffic_data("test-tenant", request)
        assert isinstance(traffic_data, list)
    @pytest.mark.asyncio
    async def test_messaging_integration(self, mock_messaging):
        """Test integration with messaging system"""
        from app.services.messaging import (
            publish_job_started,
            publish_job_completed,
            publish_model_trained
        )
        # Test various message types
        result1 = await publish_job_started("job-123", "tenant-123", {})
        result2 = await publish_job_completed("job-123", "tenant-123", {"status": "success"})
        result3 = await publish_model_trained("model-123", "tenant-123", "Pan Integral", {"mae": 5.0})
        assert result1 is True
        assert result2 is True
        assert result3 is True
    @pytest.mark.asyncio
    async def test_database_integration(self, test_db_session, training_service):
        """Test database operations integration"""
        # Create a training job
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="integration-test-job",
            config={"test": True}
        )
        assert job.job_id == "integration-test-job"
        # Update job status
        await training_service._update_job_status(
            db=test_db_session,
            job_id=job.job_id,
            status="running",
            progress=50,
            current_step="Processing data"
        )
        # Retrieve updated job
        updated_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=job.job_id,
            tenant_id="test-tenant"
        )
        assert updated_job.status == "running"
        assert updated_job.progress == 50
 class TestErrorHandlingIntegration:
    """Test error handling across service boundaries"""
    @pytest.mark.asyncio
    async def test_data_service_failure_handling(
        self,
        test_client: AsyncClient,
        mock_messaging
    ):
        """Test handling when data service is unavailable"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        # Mock data service failure
        with patch('httpx.AsyncClient') as mock_client:
            mock_client.return_value.__aenter__.return_value.get.side_effect = Exception("Service unavailable")
            with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
                response = await test_client.post("/training/jobs", json=request_data)
            # Should still create job but might fail during execution
            assert response.status_code == 200
    @pytest.mark.asyncio
    async def test_messaging_failure_handling(
        self,
        test_client: AsyncClient,
        mock_data_service
    ):
        """Test handling when messaging fails"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        # Mock messaging failure
        with patch('app.services.messaging.publish_job_started', side_effect=Exception("Messaging failed")):
            with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
                response = await test_client.post("/training/jobs", json=request_data)
            # Should still succeed even if messaging fails
            assert response.status_code == 200
    @pytest.mark.asyncio
    async def test_ml_training_failure_handling(
        self,
        test_client: AsyncClient,
        mock_messaging,
        mock_data_service
    ):
        """Test handling when ML training fails"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        # Mock ML training failure
        with patch('app.ml.trainer.BakeryMLTrainer.train_tenant_models', side_effect=Exception("ML training failed")):
            with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
                response = await test_client.post("/training/jobs", json=request_data)
            # Job should be created successfully
            assert response.status_code == 200
            # Background task would handle the failure
 class TestPerformanceIntegration:
    """Test performance characteristics of integrated workflows"""
    @pytest.mark.asyncio
    async def test_concurrent_training_jobs(
        self,
        test_client: AsyncClient,
        mock_messaging,
        mock_data_service,
        mock_ml_trainer
    ):
        """Test handling multiple concurrent training jobs"""
        request_data = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 30
        }
        # Start multiple jobs concurrently
        tasks = []
        for i in range(5):
            with patch('app.api.training.get_current_tenant_id', return_value=f"tenant-{i}"):
                task = test_client.post("/training/jobs", json=request_data)
                tasks.append(task)
        responses = await asyncio.gather(*tasks)
        # All jobs should be created successfully
        for response in responses:
            assert response.status_code == 200
            data = response.json()
            assert "job_id" in data
    @pytest.mark.asyncio
    async def test_large_dataset_handling(
        self,
        training_service,
        test_db_session
    ):
        """Test handling of large datasets"""
        # Simulate large dataset
        large_config = {
            "include_weather": True,
            "include_traffic": True,
            "min_data_points": 1000,  # Large minimum
            "products": [f"Product-{i}" for i in range(100)]  # Many products
        }
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="large-dataset-job",
            config=large_config
        )
        assert job.config == large_config
        assert job.job_id == "large-dataset-job"
    @pytest.mark.asyncio
    async def test_rapid_status_checks(
        self,
        test_client: AsyncClient,
        training_job_in_db
    ):
        """Test rapid successive status checks"""
        job_id = training_job_in_db.job_id
        # Make many rapid status requests
        tasks = []
        for _ in range(20):
            with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
                task = test_client.get(f"/training/jobs/{job_id}/status")
                tasks.append(task)
        responses = await asyncio.gather(*tasks)
        # All requests should succeed
        for response in responses:
            assert response.status_code == 200
 class TestSecurityIntegration:
    """Test security aspects of service integration"""
    @pytest.mark.asyncio
    async def test_tenant_isolation(
        self,
        test_client: AsyncClient,
        training_job_in_db,
        mock_messaging
    ):
        """Test that tenants cannot access each other's jobs"""
        job_id = training_job_in_db.job_id
        # Try to access job with different tenant ID
        with patch('app.api.training.get_current_tenant_id', return_value="different-tenant"):
            response = await test_client.get(f"/training/jobs/{job_id}/status")
        # Should not find the job (belongs to different tenant)
        assert response.status_code == 404
    @pytest.mark.asyncio
    async def test_input_validation_integration(
        self,
        test_client: AsyncClient
    ):
        """Test input validation across API boundaries"""
        # Test invalid seasonality mode
        invalid_request = {
            "seasonality_mode": "invalid_mode",
            "min_data_points": -5  # Invalid negative value
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=invalid_request)
        assert response.status_code == 422  # Validation error
    @pytest.mark.asyncio
    async def test_sql_injection_protection(
        self,
        test_client: AsyncClient
    ):
        """Test protection against SQL injection attempts"""
        # Try SQL injection in job ID
        malicious_job_id = "job'; DROP TABLE model_training_logs; --"
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.get(f"/training/jobs/{malicious_job_id}/status")
        # Should return 404, not cause database error
        assert response.status_code == 404
 class TestRecoveryIntegration:
    """Test recovery and resilience scenarios"""
    @pytest.mark.asyncio
    async def test_service_restart_recovery(
        self,
        test_db_session,
        training_service,
        training_job_in_db
    ):
        """Test service recovery after restart"""
        # Simulate service restart by creating new service instance
        new_training_service = training_service.__class__()
        # Should be able to access existing jobs
        existing_job = await new_training_service.get_job_status(
            db=test_db_session,
            job_id=training_job_in_db.job_id,
            tenant_id=training_job_in_db.tenant_id
        )
        assert existing_job is not None
        assert existing_job.job_id == training_job_in_db.job_id
    @pytest.mark.asyncio
    async def test_partial_failure_recovery(
        self,
        training_service,
        test_db_session
    ):
        """Test recovery from partial failures"""
        # Create job that might fail partway through
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="partial-failure-job",
            config={"simulate_failure": True}
        )
        # Simulate partial progress
        await training_service._update_job_status(
            db=test_db_session,
            job_id=job.job_id,
            status="running",
            progress=50,
            current_step="Halfway through training"
        )
        # Simulate failure
        await training_service._update_job_status(
            db=test_db_session,
            job_id=job.job_id,
            status="failed",
            progress=50,
            current_step="Training failed",
            error_message="Simulated failure"
        )
        # Verify failure was recorded
        failed_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=job.job_id,
            tenant_id="test-tenant"
        )
        assert failed_job.status == "failed"
        assert failed_job.error_message == "Simulated failure"
        assert failed_job.progress == 50
 class TestComplianceIntegration:
    """Test compliance and audit requirements"""
    @pytest.mark.asyncio
    async def test_audit_trail_creation(
        self,
        training_service,
        test_db_session
    ):
        """Test that audit trail is properly created"""
        # Create and update job
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="audit-test-job",
            config={"audit_test": True}
        )
        # Multiple status updates
        await training_service._update_job_status(
            db=test_db_session,
            job_id=job.job_id,
            status="running",
            progress=25,
            current_step="Started processing"
        )
        await training_service._update_job_status(
            db=test_db_session,
            job_id=job.job_id,
            status="running",
            progress=75,
            current_step="Almost complete"
        )
        await training_service._update_job_status(
            db=test_db_session,
            job_id=job.job_id,
            status="completed",
            progress=100,
            current_step="Completed successfully"
        )
        # Verify audit trail
        logs = await training_service.get_training_logs(
            db=test_db_session,
            job_id=job.job_id,
            tenant_id="test-tenant"
        )
        assert logs is not None
        assert len(logs) > 0
        # Check final status
        final_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=job.job_id,
            tenant_id="test-tenant"
        )
        assert final_job.status == "completed"
        assert final_job.progress == 100
    @pytest.mark.asyncio
    async def test_data_retention_compliance(
        self,
        training_service,
        test_db_session
    ):
        """Test data retention and cleanup compliance"""
        from datetime import datetime, timedelta
        # Create old job (simulate old data)
        old_job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="old-job",
            config={"created_long_ago": True}
        )
        # Manually set old timestamp
        from sqlalchemy import update
        from app.models.training import ModelTrainingLog
        old_timestamp = datetime.now() - timedelta(days=400)
        await test_db_session.execute(
            update(ModelTrainingLog)
            .where(ModelTrainingLog.job_id == old_job.job_id)
            .values(start_time=old_timestamp, created_at=old_timestamp)
        )
        await test_db_session.commit()
        # Verify old job exists
        retrieved_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=old_job.job_id,
            tenant_id="test-tenant"
        )
        assert retrieved_job is not None
        # In a real implementation, there would be cleanup procedures
    @pytest.mark.asyncio
    async def test_gdpr_compliance_features(
        self,
        training_service,
        test_db_session
    ):
        """Test GDPR compliance features"""
        # Create job with tenant data
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="gdpr-test-tenant",
            job_id="gdpr-test-job",
            config={"gdpr_test": True}
        )
        # Verify job is associated with tenant
        assert job.tenant_id == "gdpr-test-tenant"
        # Test data access (right to access)
        tenant_jobs = await training_service.list_training_jobs(
            db=test_db_session,
            tenant_id="gdpr-test-tenant"
        )
        assert len(tenant_jobs) >= 1
        assert any(job.job_id == "gdpr-test-job" for job in tenant_jobs)
@pytest.mark.slow
 class TestLongRunningIntegration:
    """Test long-running integration scenarios (marked as slow)"""
    @pytest.mark.asyncio
    async def test_extended_training_simulation(
        self,
        training_service,
        test_db_session,
        mock_messaging
    ):
        """Test extended training process simulation"""
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="long-running-job",
            config={"extended_test": True}
        )
        # Simulate progress over time
        progress_steps = [
            (10, "Initializing"),
            (25, "Loading data"),
            (50, "Training models"),
            (75, "Validating results"),
            (90, "Storing models"),
            (100, "Completed")
        ]
        for progress, step in progress_steps:
            await training_service._update_job_status(
                db=test_db_session,
                job_id=job.job_id,
                status="running" if progress < 100 else "completed",
                progress=progress,
                current_step=step
            )
            # Small delay to simulate real progression
            await asyncio.sleep(0.01)
        # Verify final state
        final_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=job.job_id,
            tenant_id="test-tenant"
        )
        assert final_job.status == "completed"
        assert final_job.progress == 100
        assert final_job.current_step == "Completed"
    @pytest.mark.asyncio
    async def test_memory_usage_stability(
        self,
        training_service,
        test_db_session
    ):
        """Test memory usage stability over many operations"""
        # Create many jobs to test memory stability
        for i in range(50):
            job = await training_service.create_training_job(
                db=test_db_session,
                tenant_id=f"tenant-{i % 5}",  # 5 different tenants
                job_id=f"memory-test-job-{i}",
                config={"iteration": i}
            )
            # Update status
            await training_service._update_job_status(
                db=test_db_session,
                job_id=job.job_id,
                status="completed",
                progress=100,
                current_step="Completed"
            )
        # List jobs for each tenant
        for tenant_i in range(5):
            tenant_id = f"tenant-{tenant_i}"
            jobs = await training_service.list_training_jobs(
                db=test_db_session,
                tenant_id=tenant_id,
                limit=20
            )
            # Should have 10 jobs per tenant (50 total / 5 tenants)
            assert len(jobs) == 10
 class TestBackwardCompatibility:
    """Test backward compatibility with existing systems"""
    @pytest.mark.asyncio
    async def test_legacy_config_handling(
        self,
        training_service,
        test_db_session
    ):
        """Test handling of legacy configuration formats"""
        # Test with old-style configuration
        legacy_config = {
            "weather_enabled": True,  # Old key
            "traffic_enabled": True,   # Old key
            "minimum_samples": 30,     # Old key
            "prophet_config": {        # Old nested structure
                "seasonality": "additive"
            }
        }
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="legacy-config-job",
            config=legacy_config
        )
        assert job.config == legacy_config
        assert job.job_id == "legacy-config-job"
    @pytest.mark.asyncio
    async def test_api_version_compatibility(
        self,
        test_client: AsyncClient
    ):
        """Test API version compatibility"""
        # Test with minimal request (old API style)
        minimal_request = {
            "include_weather": True
        }
        with patch('shared.auth.decorators.get_current_tenant_id_dep', return_value="test-tenant"):
            response = await test_client.post("/training/jobs", json=minimal_request)
        # Should work with defaults for missing fields
        assert response.status_code == 200
        data = response.json()
        assert "job_id" in data
 # Utility functions for integration tests
 async def wait_for_condition(condition_func, timeout=5.0, interval=0.1):
    """Wait for a condition to become true"""
    import time
    start_time = time.time()
    while time.time() - start_time < timeout:
        if await condition_func():
            return True
        await asyncio.sleep(interval)
    return False
 def assert_job_progression(job_updates):
    """Assert that job updates show proper progression"""
    assert len(job_updates) > 0
    # Check progress is non-decreasing
    for i in range(1, len(job_updates)):
        assert job_updates[i]["progress"] >= job_updates[i-1]["progress"]
    # Check final status
    final_update = job_updates[-1]
    assert final_update["status"] in ["completed", "failed", "cancelled"]
 def assert_valid_job_structure(job_data):
    """Assert job data has valid structure"""
    required_fields = ["job_id", "status", "tenant_id"]
    for field in required_fields:
        assert field in job_data
    assert isinstance(job_data["progress"], int)
    assert 0 <= job_data["progress"] <= 100
    assert job_data["status"] in ["pending", "running", "completed", "failed", "cancelled"]
--- a/services/training/tests/test_messaging.py
+++ b/services/training/tests/test_messaging.py
@@ -1,467 +0,0 @@
 # services/training/tests/test_messaging.py
 """
 Tests for training service messaging functionality
 """
 import pytest
 from unittest.mock import AsyncMock, Mock, patch
 import json
 from app.services import messaging
 class TestTrainingMessaging:
    """Test training service messaging functions"""
    @pytest.fixture
    def mock_publisher(self):
        """Mock the RabbitMQ publisher"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=True)
            mock_pub.connect = AsyncMock(return_value=True)
            mock_pub.disconnect = AsyncMock(return_value=None)
            yield mock_pub
    @pytest.mark.asyncio
    async def test_setup_messaging_success(self, mock_publisher):
        """Test successful messaging setup"""
        await messaging.setup_messaging()
        mock_publisher.connect.assert_called_once()
    @pytest.mark.asyncio
    async def test_setup_messaging_failure(self, mock_publisher):
        """Test messaging setup failure"""
        mock_publisher.connect.return_value = False
        await messaging.setup_messaging()
        mock_publisher.connect.assert_called_once()
    @pytest.mark.asyncio
    async def test_cleanup_messaging(self, mock_publisher):
        """Test messaging cleanup"""
        await messaging.cleanup_messaging()
        mock_publisher.disconnect.assert_called_once()
    @pytest.mark.asyncio
    async def test_publish_job_started(self, mock_publisher):
        """Test publishing job started event"""
        job_id = "test-job-123"
        tenant_id = "test-tenant"
        config = {"include_weather": True}
        result = await messaging.publish_job_started(job_id, tenant_id, config)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        # Check call arguments
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["exchange_name"] == "training.events"
        assert call_args[1]["routing_key"] == "training.started"
        event_data = call_args[1]["event_data"]
        assert event_data["service_name"] == "training-service"
        assert event_data["data"]["job_id"] == job_id
        assert event_data["data"]["tenant_id"] == tenant_id
        assert event_data["data"]["config"] == config
    @pytest.mark.asyncio
    async def test_publish_job_progress(self, mock_publisher):
        """Test publishing job progress event"""
        job_id = "test-job-123"
        tenant_id = "test-tenant"
        progress = 50
        step = "Training models"
        result = await messaging.publish_job_progress(job_id, tenant_id, progress, step)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.progress"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["progress"] == progress
        assert event_data["data"]["current_step"] == step
    @pytest.mark.asyncio
    async def test_publish_job_completed(self, mock_publisher):
        """Test publishing job completed event"""
        job_id = "test-job-123"
        tenant_id = "test-tenant"
        results = {
            "products_trained": 3,
            "summary": {"success_rate": 100.0}
        }
        result = await messaging.publish_job_completed(job_id, tenant_id, results)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.completed"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["results"] == results
        assert event_data["data"]["models_trained"] == 3
        assert event_data["data"]["success_rate"] == 100.0
    @pytest.mark.asyncio
    async def test_publish_job_failed(self, mock_publisher):
        """Test publishing job failed event"""
        job_id = "test-job-123"
        tenant_id = "test-tenant"
        error = "Data service unavailable"
        result = await messaging.publish_job_failed(job_id, tenant_id, error)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.failed"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["error"] == error
    @pytest.mark.asyncio
    async def test_publish_job_cancelled(self, mock_publisher):
        """Test publishing job cancelled event"""
        job_id = "test-job-123"
        tenant_id = "test-tenant"
        result = await messaging.publish_job_cancelled(job_id, tenant_id)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.cancelled"
    @pytest.mark.asyncio
    async def test_publish_product_training_started(self, mock_publisher):
        """Test publishing product training started event"""
        job_id = "test-product-job-123"
        tenant_id = "test-tenant"
        product_name = "Pan Integral"
        result = await messaging.publish_product_training_started(job_id, tenant_id, product_name)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.product.started"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["product_name"] == product_name
    @pytest.mark.asyncio
    async def test_publish_product_training_completed(self, mock_publisher):
        """Test publishing product training completed event"""
        job_id = "test-product-job-123"
        tenant_id = "test-tenant"
        product_name = "Pan Integral"
        model_id = "test-model-123"
        result = await messaging.publish_product_training_completed(
            job_id, tenant_id, product_name, model_id
        )
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.product.completed"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["model_id"] == model_id
        assert event_data["data"]["product_name"] == product_name
    @pytest.mark.asyncio
    async def test_publish_model_trained(self, mock_publisher):
        """Test publishing model trained event"""
        model_id = "test-model-123"
        tenant_id = "test-tenant"
        product_name = "Pan Integral"
        metrics = {"mae": 5.2, "rmse": 7.8, "mape": 12.5}
        result = await messaging.publish_model_trained(model_id, tenant_id, product_name, metrics)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.model.trained"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["training_metrics"] == metrics
    @pytest.mark.asyncio
    async def test_publish_model_updated(self, mock_publisher):
        """Test publishing model updated event"""
        model_id = "test-model-123"
        tenant_id = "test-tenant"
        product_name = "Pan Integral"
        version = 2
        result = await messaging.publish_model_updated(model_id, tenant_id, product_name, version)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.model.updated"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["version"] == version
    @pytest.mark.asyncio
    async def test_publish_model_validated(self, mock_publisher):
        """Test publishing model validated event"""
        model_id = "test-model-123"
        tenant_id = "test-tenant"
        product_name = "Pan Integral"
        validation_results = {"is_valid": True, "accuracy": 0.95}
        result = await messaging.publish_model_validated(
            model_id, tenant_id, product_name, validation_results
        )
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.model.validated"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["validation_results"] == validation_results
    @pytest.mark.asyncio
    async def test_publish_model_saved(self, mock_publisher):
        """Test publishing model saved event"""
        model_id = "test-model-123"
        tenant_id = "test-tenant"
        product_name = "Pan Integral"
        model_path = "/models/test-model-123.pkl"
        result = await messaging.publish_model_saved(model_id, tenant_id, product_name, model_path)
        assert result is True
        mock_publisher.publish_event.assert_called_once()
        call_args = mock_publisher.publish_event.call_args
        assert call_args[1]["routing_key"] == "training.model.saved"
        event_data = call_args[1]["event_data"]
        assert event_data["data"]["model_path"] == model_path
 class TestMessagingErrorHandling:
    """Test error handling in messaging"""
    @pytest.fixture
    def failing_publisher(self):
        """Mock publisher that fails"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=False)
            mock_pub.connect = AsyncMock(return_value=False)
            yield mock_pub
    @pytest.mark.asyncio
    async def test_publish_event_failure(self, failing_publisher):
        """Test handling of publish event failure"""
        result = await messaging.publish_job_started("job-123", "tenant-123", {})
        assert result is False
        failing_publisher.publish_event.assert_called_once()
    @pytest.mark.asyncio
    async def test_setup_messaging_connection_failure(self, failing_publisher):
        """Test setup with connection failure"""
        await messaging.setup_messaging()
        failing_publisher.connect.assert_called_once()
    @pytest.mark.asyncio
    async def test_publish_with_exception(self):
        """Test publishing with exception"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event.side_effect = Exception("Connection lost")
            result = await messaging.publish_job_started("job-123", "tenant-123", {})
            assert result is False
 class TestMessagingIntegration:
    """Test messaging integration with shared components"""
    @pytest.mark.asyncio
    async def test_event_structure_consistency(self):
        """Test that events follow consistent structure"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=True)
            # Test different event types
            await messaging.publish_job_started("job-123", "tenant-123", {})
            await messaging.publish_job_completed("job-123", "tenant-123", {})
            await messaging.publish_model_trained("model-123", "tenant-123", "Pan", {})
            # Verify all calls have consistent structure
            assert mock_pub.publish_event.call_count == 3
            for call in mock_pub.publish_event.call_args_list:
                event_data = call[1]["event_data"]
                # All events should have these fields
                assert "service_name" in event_data
                assert "event_type" in event_data
                assert "data" in event_data
                assert event_data["service_name"] == "training-service"
    @pytest.mark.asyncio
    async def test_shared_event_classes_usage(self):
        """Test that shared event classes are used properly"""
        with patch('shared.messaging.events.TrainingStartedEvent') as mock_event_class:
            mock_event = Mock()
            mock_event.to_dict.return_value = {
                "service_name": "training-service",
                "event_type": "training.started",
                "data": {"job_id": "test-job"}
            }
            mock_event_class.return_value = mock_event
            with patch('app.services.messaging.training_publisher') as mock_pub:
                mock_pub.publish_event = AsyncMock(return_value=True)
                await messaging.publish_job_started("test-job", "test-tenant", {})
                # Verify shared event class was used
                mock_event_class.assert_called_once()
                mock_event.to_dict.assert_called_once()
    @pytest.mark.asyncio
    async def test_routing_key_consistency(self):
        """Test that routing keys follow consistent patterns"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=True)
            # Test various event types
            events_and_keys = [
                (messaging.publish_job_started, "training.started"),
                (messaging.publish_job_progress, "training.progress"),
                (messaging.publish_job_completed, "training.completed"),
                (messaging.publish_job_failed, "training.failed"),
                (messaging.publish_job_cancelled, "training.cancelled"),
                (messaging.publish_product_training_started, "training.product.started"),
                (messaging.publish_product_training_completed, "training.product.completed"),
                (messaging.publish_model_trained, "training.model.trained"),
                (messaging.publish_model_updated, "training.model.updated"),
                (messaging.publish_model_validated, "training.model.validated"),
                (messaging.publish_model_saved, "training.model.saved")
            ]
            for event_func, expected_key in events_and_keys:
                mock_pub.reset_mock()
                # Call event function with appropriate parameters
                if "progress" in expected_key:
                    await event_func("job-123", "tenant-123", 50, "step")
                elif "model" in expected_key and "trained" in expected_key:
                    await event_func("model-123", "tenant-123", "product", {})
                elif "model" in expected_key and "updated" in expected_key:
                    await event_func("model-123", "tenant-123", "product", 1)
                elif "model" in expected_key and "validated" in expected_key:
                    await event_func("model-123", "tenant-123", "product", {})
                elif "model" in expected_key and "saved" in expected_key:
                    await event_func("model-123", "tenant-123", "product", "/path")
                elif "product" in expected_key and "completed" in expected_key:
                    await event_func("job-123", "tenant-123", "product", "model-123")
                elif "product" in expected_key:
                    await event_func("job-123", "tenant-123", "product")
                elif "failed" in expected_key:
                    await event_func("job-123", "tenant-123", "error")
                elif "cancelled" in expected_key:
                    await event_func("job-123", "tenant-123")
                else:
                    await event_func("job-123", "tenant-123", {})
                # Verify routing key
                call_args = mock_pub.publish_event.call_args
                assert call_args[1]["routing_key"] == expected_key
    @pytest.mark.asyncio
    async def test_exchange_consistency(self):
        """Test that all events use the same exchange"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=True)
            # Test multiple events
            await messaging.publish_job_started("job-123", "tenant-123", {})
            await messaging.publish_model_trained("model-123", "tenant-123", "product", {})
            await messaging.publish_product_training_started("job-123", "tenant-123", "product")
            # Verify all use same exchange
            for call in mock_pub.publish_event.call_args_list:
                assert call[1]["exchange_name"] == "training.events"
 class TestMessagingPerformance:
    """Test messaging performance and reliability"""
    @pytest.mark.asyncio
    async def test_concurrent_publishing(self):
        """Test concurrent event publishing"""
        import asyncio
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=True)
            # Create multiple concurrent publishing tasks
            tasks = []
            for i in range(10):
                task = messaging.publish_job_progress(f"job-{i}", "tenant-123", i * 10, f"step-{i}")
                tasks.append(task)
            # Execute all tasks concurrently
            results = await asyncio.gather(*tasks)
            # Verify all succeeded
            assert all(results)
            assert mock_pub.publish_event.call_count == 10
    @pytest.mark.asyncio
    async def test_large_event_data(self):
        """Test publishing events with large data payloads"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=True)
            # Create large config data
            large_config = {
                "products": [f"Product-{i}" for i in range(1000)],
                "features": [f"feature-{i}" for i in range(100)],
                "hyperparameters": {f"param-{i}": i for i in range(50)}
            }
            result = await messaging.publish_job_started("job-123", "tenant-123", large_config)
            assert result is True
            mock_pub.publish_event.assert_called_once()
    @pytest.mark.asyncio
    async def test_rapid_sequential_publishing(self):
        """Test rapid sequential event publishing"""
        with patch('app.services.messaging.training_publisher') as mock_pub:
            mock_pub.publish_event = AsyncMock(return_value=True)
            # Publish many events in sequence
            for i in range(100):
                await messaging.publish_job_progress("job-123", "tenant-123", i, f"step-{i}")
            assert mock_pub.publish_event.call_count == 100
--- a/services/training/tests/test_ml_pipeline_integration.py
+++ b/services/training/tests/test_ml_pipeline_integration.py
--- a/services/training/tests/test_performance.py
+++ b/services/training/tests/test_performance.py
@@ -1,630 +0,0 @@
 # ================================================================
 # services/training/tests/test_performance.py
 # ================================================================
 """
 Performance and Load Testing for Training Service
 Tests training performance with real-world data volumes
 """
 import pytest
 import asyncio
 import pandas as pd
 import numpy as np
 import time
 from datetime import datetime, timedelta
 from concurrent.futures import ThreadPoolExecutor
 import psutil
 import gc
 from typing import List, Dict, Any
 import logging
 from app.ml.trainer import BakeryMLTrainer
 from app.ml.data_processor import BakeryDataProcessor
 from app.services.training_service import TrainingService
 class TestTrainingPerformance:
    """Performance tests for training service components"""
    @pytest.fixture
    def large_sales_dataset(self):
        """Generate large dataset for performance testing (2 years of data)"""
        start_date = datetime(2022, 1, 1)
        end_date = datetime(2024, 1, 1)
        date_range = pd.date_range(start=start_date, end=end_date, freq='D')
        products = [
            "Pan Integral", "Pan Blanco", "Croissant", "Magdalenas", 
            "Empanadas", "Tarta Chocolate", "Roscon Reyes", "Palmeras",
            "Donuts", "Berlinas", "Napolitanas", "Ensaimadas"
        ]
        data = []
        for date in date_range:
            for product in products:
                # Realistic sales simulation
                base_quantity = np.random.randint(5, 150)
                # Seasonal patterns
                if date.month in [12, 1]:  # Winter/Holiday season
                    base_quantity *= 1.4
                elif date.month in [6, 7, 8]:  # Summer
                    base_quantity *= 0.8
                # Weekly patterns
                if date.weekday() >= 5:  # Weekends
                    base_quantity *= 1.2
                elif date.weekday() == 0:  # Monday
                    base_quantity *= 0.7
                # Add noise
                quantity = max(1, int(base_quantity + np.random.normal(0, base_quantity * 0.1)))
                data.append({
                    "date": date.strftime("%Y-%m-%d"),
                    "product": product,
                    "quantity": quantity,
                    "revenue": round(quantity * np.random.uniform(1.5, 8.0), 2),
                    "temperature": round(15 + 12 * np.sin((date.timetuple().tm_yday / 365) * 2 * np.pi) + np.random.normal(0, 3), 1),
                    "precipitation": max(0, np.random.exponential(0.8)),
                    "is_weekend": date.weekday() >= 5,
                    "is_holiday": self._is_spanish_holiday(date)
                })
        return pd.DataFrame(data)
    def _is_spanish_holiday(self, date: datetime) -> bool:
        """Check if date is a Spanish holiday"""
        holidays = [
            (1, 1),   # New Year
            (1, 6),   # Epiphany
            (5, 1),   # Labor Day
            (8, 15),  # Assumption
            (10, 12), # National Day
            (11, 1),  # All Saints
            (12, 6),  # Constitution Day
            (12, 8),  # Immaculate Conception
            (12, 25), # Christmas
        ]
        return (date.month, date.day) in holidays
    @pytest.mark.asyncio
    async def test_single_product_training_performance(self, large_sales_dataset):
        """Test performance of single product training with large dataset"""
        trainer = BakeryMLTrainer()
        product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy()
        # Measure memory before training
        process = psutil.Process()
        memory_before = process.memory_info().rss / 1024 / 1024  # MB
        start_time = time.time()
        result = await trainer.train_single_product(
            tenant_id="perf_test_tenant",
            product_name="Pan Integral",
            sales_data=product_data,
            config={
                "include_weather": True,
                "include_traffic": False,  # Skip traffic for performance
                "seasonality_mode": "additive"
            }
        )
        end_time = time.time()
        training_duration = end_time - start_time
        # Measure memory after training
        memory_after = process.memory_info().rss / 1024 / 1024  # MB
        memory_used = memory_after - memory_before
        # Performance assertions
        assert training_duration < 120, f"Training took too long: {training_duration:.2f}s"
        assert memory_used < 500, f"Memory usage too high: {memory_used:.2f}MB"
        assert result['status'] == 'completed'
        # Quality assertions
        metrics = result['metrics']
        assert metrics['mape'] < 50, f"MAPE too high: {metrics['mape']:.2f}%"
        print(f"Performance Results:")
        print(f"  Training Duration: {training_duration:.2f}s")
        print(f"  Memory Used: {memory_used:.2f}MB")
        print(f"  Data Points: {len(product_data)}")
        print(f"  MAPE: {metrics['mape']:.2f}%")
        print(f"  RMSE: {metrics['rmse']:.2f}")
    @pytest.mark.asyncio
    async def test_concurrent_training_performance(self, large_sales_dataset):
        """Test performance of concurrent training jobs"""
        trainer = BakeryMLTrainer()
        products = ["Pan Integral", "Croissant", "Magdalenas"]
        async def train_product(product_name: str):
            """Train a single product"""
            product_data = large_sales_dataset[large_sales_dataset['product'] == product_name].copy()
            start_time = time.time()
            result = await trainer.train_single_product(
                tenant_id=f"concurrent_test_{product_name.replace(' ', '_').lower()}",
                product_name=product_name,
                sales_data=product_data,
                config={"include_weather": True, "include_traffic": False}
            )
            end_time = time.time()
            return {
                'product': product_name,
                'duration': end_time - start_time,
                'status': result['status'],
                'metrics': result.get('metrics', {})
            }
        # Run concurrent training
        start_time = time.time()
        tasks = [train_product(product) for product in products]
        results = await asyncio.gather(*tasks)
        total_time = time.time() - start_time
        # Verify all trainings completed
        for result in results:
            assert result['status'] == 'completed'
            assert result['duration'] < 120  # Individual training time
        # Concurrent execution should be faster than sequential
        sequential_time_estimate = sum(r['duration'] for r in results)
        efficiency = sequential_time_estimate / total_time
        assert efficiency > 1.5, f"Concurrency efficiency too low: {efficiency:.2f}x"
        print(f"Concurrent Training Results:")
        print(f"  Total Time: {total_time:.2f}s")
        print(f"  Sequential Estimate: {sequential_time_estimate:.2f}s")
        print(f"  Efficiency: {efficiency:.2f}x")
        for result in results:
            print(f"  {result['product']}: {result['duration']:.2f}s, MAPE: {result['metrics'].get('mape', 'N/A'):.2f}%")
    @pytest.mark.asyncio
    async def test_data_processing_scalability(self, large_sales_dataset):
        """Test data processing performance with increasing data sizes"""
        data_processor = BakeryDataProcessor()
        # Test with different data sizes
        data_sizes = [1000, 5000, 10000, 20000, len(large_sales_dataset)]
        performance_results = []
        for size in data_sizes:
            # Take a sample of the specified size
            sample_data = large_sales_dataset.head(size).copy()
            start_time = time.time()
            # Process the data
            processed_data = await data_processor.prepare_training_data(
                sales_data=sample_data,
                include_weather=True,
                include_traffic=True,
                tenant_id="scalability_test",
                product_name="Pan Integral"
            )
            processing_time = time.time() - start_time
            performance_results.append({
                'data_size': size,
                'processing_time': processing_time,
                'processed_rows': len(processed_data),
                'throughput': size / processing_time if processing_time > 0 else 0
            })
        # Verify linear or sub-linear scaling
        for i in range(1, len(performance_results)):
            prev_result = performance_results[i-1]
            curr_result = performance_results[i]
            size_ratio = curr_result['data_size'] / prev_result['data_size']
            time_ratio = curr_result['processing_time'] / prev_result['processing_time']
            # Processing time should scale better than linearly
            assert time_ratio < size_ratio * 1.5, f"Poor scaling at size {curr_result['data_size']}"
        print("Data Processing Scalability Results:")
        for result in performance_results:
            print(f"  Size: {result['data_size']:,} rows, Time: {result['processing_time']:.2f}s, "
                  f"Throughput: {result['throughput']:.0f} rows/s")
    @pytest.mark.asyncio
    async def test_memory_usage_optimization(self, large_sales_dataset):
        """Test memory usage optimization during training"""
        trainer = BakeryMLTrainer()
        process = psutil.Process()
        # Baseline memory
        gc.collect()  # Force garbage collection
        baseline_memory = process.memory_info().rss / 1024 / 1024  # MB
        memory_snapshots = [{'stage': 'baseline', 'memory_mb': baseline_memory}]
        # Load data
        product_data = large_sales_dataset[large_sales_dataset['product'] == 'Pan Integral'].copy()
        current_memory = process.memory_info().rss / 1024 / 1024
        memory_snapshots.append({'stage': 'data_loaded', 'memory_mb': current_memory})
        # Train model
        result = await trainer.train_single_product(
            tenant_id="memory_test_tenant",
            product_name="Pan Integral",
            sales_data=product_data,
            config={"include_weather": True, "include_traffic": True}
        )
        current_memory = process.memory_info().rss / 1024 / 1024
        memory_snapshots.append({'stage': 'model_trained', 'memory_mb': current_memory})
        # Cleanup
        del product_data
        del result
        gc.collect()
        final_memory = process.memory_info().rss / 1024 / 1024
        memory_snapshots.append({'stage': 'cleanup', 'memory_mb': final_memory})
        # Memory assertions
        peak_memory = max(snapshot['memory_mb'] for snapshot in memory_snapshots)
        memory_increase = peak_memory - baseline_memory
        memory_after_cleanup = final_memory - baseline_memory
        assert memory_increase < 800, f"Peak memory increase too high: {memory_increase:.2f}MB"
        assert memory_after_cleanup < 100, f"Memory not properly cleaned up: {memory_after_cleanup:.2f}MB"
        print("Memory Usage Analysis:")
        for snapshot in memory_snapshots:
            print(f"  {snapshot['stage']}: {snapshot['memory_mb']:.2f}MB")
        print(f"  Peak increase: {memory_increase:.2f}MB")
        print(f"  After cleanup: {memory_after_cleanup:.2f}MB")
    @pytest.mark.asyncio
    async def test_training_service_throughput(self, large_sales_dataset):
        """Test training service throughput with multiple requests"""
        training_service = TrainingService()
        # Simulate multiple training requests
        num_requests = 5
        products = ["Pan Integral", "Croissant", "Magdalenas", "Empanadas", "Tarta Chocolate"]
        async def execute_training_request(request_id: int, product: str):
            """Execute a single training request"""
            product_data = large_sales_dataset[large_sales_dataset['product'] == product].copy()
            with patch.object(training_service, '_fetch_sales_data', return_value=product_data):
                start_time = time.time()
                result = await training_service.execute_training_job(
                    db=None,  # Mock DB session
                    tenant_id=f"throughput_test_tenant_{request_id}",
                    job_id=f"job_{request_id}_{product.replace(' ', '_').lower()}",
                    request={
                        'products': [product],
                        'include_weather': True,
                        'include_traffic': False,
                        'config': {'seasonality_mode': 'additive'}
                    }
                )
                duration = time.time() - start_time
                return {
                    'request_id': request_id,
                    'product': product,
                    'duration': duration,
                    'status': result.get('status', 'unknown'),
                    'models_trained': len(result.get('models_trained', []))
                }
        # Execute requests concurrently
        start_time = time.time()
        tasks = [
            execute_training_request(i, products[i % len(products)]) 
            for i in range(num_requests)
        ]
        results = await asyncio.gather(*tasks)
        total_time = time.time() - start_time
        # Calculate throughput metrics
        successful_requests = sum(1 for r in results if r['status'] == 'completed')
        throughput = successful_requests / total_time  # requests per second
        # Performance assertions
        assert successful_requests >= num_requests * 0.8, "Too many failed requests"
        assert throughput >= 0.1, f"Throughput too low: {throughput:.3f} req/s"
        assert total_time < 300, f"Total time too long: {total_time:.2f}s"
        print(f"Training Service Throughput Results:")
        print(f"  Total Requests: {num_requests}")
        print(f"  Successful: {successful_requests}")
        print(f"  Total Time: {total_time:.2f}s")
        print(f"  Throughput: {throughput:.3f} req/s")
        print(f"  Average Request Time: {total_time/num_requests:.2f}s")
    @pytest.mark.asyncio
    async def test_large_dataset_edge_cases(self, large_sales_dataset):
        """Test handling of edge cases with large datasets"""
        data_processor = BakeryDataProcessor()
        # Test 1: Dataset with many missing values
        corrupted_data = large_sales_dataset.copy()
        # Introduce 30% missing values randomly
        mask = np.random.random(len(corrupted_data)) < 0.3
        corrupted_data.loc[mask, 'quantity'] = np.nan
        start_time = time.time()
        result = await data_processor.validate_data_quality(corrupted_data)
        validation_time = time.time() - start_time
        assert validation_time < 10, f"Validation too slow: {validation_time:.2f}s"
        assert result['is_valid'] is False
        assert 'high_missing_data' in result['issues']
        # Test 2: Dataset with extreme outliers
        outlier_data = large_sales_dataset.copy()
        # Add extreme outliers (100x normal values)
        outlier_indices = np.random.choice(len(outlier_data), size=int(len(outlier_data) * 0.01), replace=False)
        outlier_data.loc[outlier_indices, 'quantity'] *= 100
        start_time = time.time()
        cleaned_data = await data_processor.clean_outliers(outlier_data)
        cleaning_time = time.time() - start_time
        assert cleaning_time < 15, f"Outlier cleaning too slow: {cleaning_time:.2f}s"
        assert len(cleaned_data) > len(outlier_data) * 0.95  # Should retain most data
        # Test 3: Very sparse data (many products with few sales)
        sparse_data = large_sales_dataset.copy()
        # Keep only 10% of data for each product randomly
        sparse_data = sparse_data.groupby('product').apply(
            lambda x: x.sample(n=max(1, int(len(x) * 0.1)))
        ).reset_index(drop=True)
        start_time = time.time()
        validation_result = await data_processor.validate_data_quality(sparse_data)
        sparse_validation_time = time.time() - start_time
        assert sparse_validation_time < 5, f"Sparse data validation too slow: {sparse_validation_time:.2f}s"
        print("Edge Case Performance Results:")
        print(f"  Corrupted data validation: {validation_time:.2f}s")
        print(f"  Outlier cleaning: {cleaning_time:.2f}s")
        print(f"  Sparse data validation: {sparse_validation_time:.2f}s")
 class TestTrainingServiceLoad:
    """Load testing for training service under stress"""
    @pytest.mark.asyncio
    async def test_sustained_load_training(self, large_sales_dataset):
        """Test training service under sustained load"""
        trainer = BakeryMLTrainer()
        # Define load test parameters
        duration_minutes = 2  # Run for 2 minutes
        requests_per_minute = 3
        products = ["Pan Integral", "Croissant", "Magdalenas"]
        async def sustained_training_worker(worker_id: int, duration: float):
            """Worker that continuously submits training requests"""
            start_time = time.time()
            completed_requests = 0
            failed_requests = 0
            while time.time() - start_time < duration:
                try:
                    product = products[completed_requests % len(products)]
                    product_data = large_sales_dataset[
                        large_sales_dataset['product'] == product
                    ].copy()
                    result = await trainer.train_single_product(
                        tenant_id=f"load_test_worker_{worker_id}",
                        product_name=product,
                        sales_data=product_data,
                        config={"include_weather": False, "include_traffic": False}  # Minimal config for speed
                    )
                    if result['status'] == 'completed':
                        completed_requests += 1
                    else:
                        failed_requests += 1
                except Exception as e:
                    failed_requests += 1
                    logging.error(f"Training request failed: {e}")
                # Wait before next request
                await asyncio.sleep(60 / requests_per_minute)
            return {
                'worker_id': worker_id,
                'completed': completed_requests,
                'failed': failed_requests,
                'duration': time.time() - start_time
            }
        # Start multiple workers
        num_workers = 2
        duration_seconds = duration_minutes * 60
        start_time = time.time()
        tasks = [
            sustained_training_worker(i, duration_seconds) 
            for i in range(num_workers)
        ]
        results = await asyncio.gather(*tasks)
        total_time = time.time() - start_time
        # Analyze results
        total_completed = sum(r['completed'] for r in results)
        total_failed = sum(r['failed'] for r in results)
        success_rate = total_completed / (total_completed + total_failed) if (total_completed + total_failed) > 0 else 0
        # Performance assertions
        assert success_rate >= 0.8, f"Success rate too low: {success_rate:.2%}"
        assert total_completed >= duration_minutes * requests_per_minute * num_workers * 0.7, "Throughput too low"
        print(f"Sustained Load Test Results:")
        print(f"  Duration: {total_time:.2f}s")
        print(f"  Workers: {num_workers}")
        print(f"  Completed Requests: {total_completed}")
        print(f"  Failed Requests: {total_failed}")
        print(f"  Success Rate: {success_rate:.2%}")
        print(f"  Average Throughput: {total_completed/total_time:.2f} req/s")
    @pytest.mark.asyncio
    async def test_resource_exhaustion_recovery(self, large_sales_dataset):
        """Test service recovery from resource exhaustion"""
        trainer = BakeryMLTrainer()
        # Simulate resource exhaustion by running many concurrent requests
        num_concurrent = 10  # High concurrency to stress the system
        async def resource_intensive_task(task_id: int):
            """Task designed to consume resources"""
            try:
                # Use all products to increase memory usage
                all_products_data = large_sales_dataset.copy()
                result = await trainer.train_tenant_models(
                    tenant_id=f"resource_test_{task_id}",
                    sales_data=all_products_data,
                    config={
                        "train_all_products": True,
                        "include_weather": True,
                        "include_traffic": True
                    }
                )
                return {'task_id': task_id, 'status': 'completed', 'error': None}
            except Exception as e:
                return {'task_id': task_id, 'status': 'failed', 'error': str(e)}
        # Launch all tasks simultaneously
        start_time = time.time()
        tasks = [resource_intensive_task(i) for i in range(num_concurrent)]
        results = await asyncio.gather(*tasks, return_exceptions=True)
        duration = time.time() - start_time
        # Analyze results
        completed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'completed')
        failed = sum(1 for r in results if isinstance(r, dict) and r['status'] == 'failed')
        exceptions = sum(1 for r in results if isinstance(r, Exception))
        # The system should handle some failures gracefully
        # but should complete at least some requests
        total_processed = completed + failed + exceptions
        processing_rate = total_processed / num_concurrent
        assert processing_rate >= 0.5, f"Too many requests not processed: {processing_rate:.2%}"
        assert duration < 600, f"Recovery took too long: {duration:.2f}s"  # 10 minutes max
        print(f"Resource Exhaustion Test Results:")
        print(f"  Concurrent Requests: {num_concurrent}")
        print(f"  Completed: {completed}")
        print(f"  Failed: {failed}")
        print(f"  Exceptions: {exceptions}")
        print(f"  Duration: {duration:.2f}s")
        print(f"  Processing Rate: {processing_rate:.2%}")
 # ================================================================
 # BENCHMARK UTILITIES
 # ================================================================
 class PerformanceBenchmark:
    """Utility class for performance benchmarking"""
    @staticmethod
    def measure_execution_time(func):
        """Decorator to measure execution time"""
        async def wrapper(*args, **kwargs):
            start_time = time.time()
            result = await func(*args, **kwargs)
            execution_time = time.time() - start_time
            if hasattr(result, 'update') and isinstance(result, dict):
                result['execution_time'] = execution_time
            return result
        return wrapper
    @staticmethod
    def memory_profiler(func):
        """Decorator to profile memory usage"""
        async def wrapper(*args, **kwargs):
            process = psutil.Process()
            # Memory before
            gc.collect()
            memory_before = process.memory_info().rss / 1024 / 1024
            result = await func(*args, **kwargs)
            # Memory after
            memory_after = process.memory_info().rss / 1024 / 1024
            memory_used = memory_after - memory_before
            if hasattr(result, 'update') and isinstance(result, dict):
                result['memory_used_mb'] = memory_used
            return result
        return wrapper
 # ================================================================
 # STANDALONE EXECUTION
 # ================================================================
 if __name__ == "__main__":
    """
    Run performance tests as standalone script
    Usage: python test_performance.py
    """
    import sys
    import os
    from unittest.mock import patch
    # Add the training service root to Python path
    training_service_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    sys.path.insert(0, training_service_root)
    print("=" * 60)
    print("TRAINING SERVICE PERFORMANCE TEST SUITE")
    print("=" * 60)
    # Setup logging
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    # Run performance tests
    pytest.main([
        __file__,
        "-v",
        "--tb=short",
        "-s",  # Don't capture output
        "--durations=10",  # Show 10 slowest tests
        "-m", "not slow",  # Skip slow tests unless specifically requested
    ])
    print("\n" + "=" * 60)
    print("PERFORMANCE TESTING COMPLETE")
    print("=" * 60)
--- a/services/training/tests/test_service.py
+++ b/services/training/tests/test_service.py
@@ -1,688 +0,0 @@
 # services/training/tests/test_service.py
 """
 Tests for training service business logic layer
 """
 import pytest
 from unittest.mock import AsyncMock, Mock, patch
 from datetime import datetime, timedelta
 import httpx
 from app.services.training_service import TrainingService
 from app.schemas.training import TrainingJobRequest, SingleProductTrainingRequest
 from app.models.training import ModelTrainingLog, TrainedModel
 class TestTrainingService:
    """Test the training service business logic"""
    @pytest.fixture
    def training_service(self, mock_ml_trainer):
        return TrainingService()
    @pytest.mark.asyncio
    async def test_create_training_job_success(
        self, 
        training_service, 
        test_db_session
    ):
        """Test successful training job creation"""
        job_id = "test-job-123"
        tenant_id = "test-tenant"
        config = {"include_weather": True, "include_traffic": True}
        result = await training_service.create_training_job(
            db=test_db_session,
            tenant_id=tenant_id,
            job_id=job_id,
            config=config
        )
        assert isinstance(result, ModelTrainingLog)
        assert result.job_id == job_id
        assert result.tenant_id == tenant_id
        assert result.status == "pending"
        assert result.progress == 0
        assert result.config == config
    @pytest.mark.asyncio
    async def test_create_single_product_job_success(
        self, 
        training_service, 
        test_db_session
    ):
        """Test successful single product job creation"""
        job_id = "test-product-job-123"
        tenant_id = "test-tenant"
        product_name = "Pan Integral"
        config = {"include_weather": True}
        result = await training_service.create_single_product_job(
            db=test_db_session,
            tenant_id=tenant_id,
            product_name=product_name,
            job_id=job_id,
            config=config
        )
        assert isinstance(result, ModelTrainingLog)
        assert result.job_id == job_id
        assert result.tenant_id == tenant_id
        assert result.config["single_product"] == product_name
        assert f"Initializing training for {product_name}" in result.current_step
    @pytest.mark.asyncio
    async def test_get_job_status_existing(
        self, 
        training_service, 
        test_db_session,
        training_job_in_db
    ):
        """Test getting status of existing job"""
        result = await training_service.get_job_status(
            db=test_db_session,
            job_id=training_job_in_db.job_id,
            tenant_id=training_job_in_db.tenant_id
        )
        assert result is not None
        assert result.job_id == training_job_in_db.job_id
        assert result.status == training_job_in_db.status
    @pytest.mark.asyncio
    async def test_get_job_status_nonexistent(
        self, 
        training_service, 
        test_db_session
    ):
        """Test getting status of non-existent job"""
        result = await training_service.get_job_status(
            db=test_db_session,
            job_id="nonexistent-job",
            tenant_id="test-tenant"
        )
        assert result is None
    @pytest.mark.asyncio
    async def test_list_training_jobs(
        self, 
        training_service, 
        test_db_session,
        training_job_in_db
    ):
        """Test listing training jobs"""
        result = await training_service.list_training_jobs(
            db=test_db_session,
            tenant_id=training_job_in_db.tenant_id,
            limit=10
        )
        assert isinstance(result, list)
        assert len(result) >= 1
        assert result[0].job_id == training_job_in_db.job_id
    @pytest.mark.asyncio
    async def test_list_training_jobs_with_filter(
        self, 
        training_service, 
        test_db_session,
        training_job_in_db
    ):
        """Test listing training jobs with status filter"""
        result = await training_service.list_training_jobs(
            db=test_db_session,
            tenant_id=training_job_in_db.tenant_id,
            limit=10,
            status_filter="pending"
        )
        assert isinstance(result, list)
        for job in result:
            assert job.status == "pending"
    @pytest.mark.asyncio
    async def test_cancel_training_job_success(
        self, 
        training_service, 
        test_db_session,
        training_job_in_db
    ):
        """Test successful job cancellation"""
        result = await training_service.cancel_training_job(
            db=test_db_session,
            job_id=training_job_in_db.job_id,
            tenant_id=training_job_in_db.tenant_id
        )
        assert result is True
        # Verify status was updated
        updated_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=training_job_in_db.job_id,
            tenant_id=training_job_in_db.tenant_id
        )
        assert updated_job.status == "cancelled"
    @pytest.mark.asyncio
    async def test_cancel_nonexistent_job(
        self, 
        training_service, 
        test_db_session
    ):
        """Test cancelling non-existent job"""
        result = await training_service.cancel_training_job(
            db=test_db_session,
            job_id="nonexistent-job",
            tenant_id="test-tenant"
        )
        assert result is False
    @pytest.mark.asyncio
    async def test_validate_training_data_valid(
        self, 
        training_service, 
        test_db_session,
        mock_data_service
    ):
        """Test validation with valid data"""
        config = {"min_data_points": 30}
        result = await training_service.validate_training_data(
            db=test_db_session,
            tenant_id="test-tenant",
            config=config
        )
        assert isinstance(result, dict)
        assert "is_valid" in result
        assert "issues" in result
        assert "recommendations" in result
        assert "estimated_time_minutes" in result
    @pytest.mark.asyncio
    async def test_validate_training_data_no_data(
        self, 
        training_service, 
        test_db_session
    ):
        """Test validation with no data"""
        config = {"min_data_points": 30}
        with patch('app.services.training_service.TrainingService._fetch_sales_data', return_value=AsyncMock(return_value=[])):
            result = await training_service.validate_training_data(
                db=test_db_session,
                tenant_id="test-tenant",
                config=config
            )
        assert result["is_valid"] is False
        assert "No sales data found" in result["issues"][0]
    @pytest.mark.asyncio
    async def test_update_job_status(
        self, 
        training_service, 
        test_db_session,
        training_job_in_db
    ):
        """Test updating job status"""
        await training_service._update_job_status(
            db=test_db_session,
            job_id=training_job_in_db.job_id,
            status="running",
            progress=50,
            current_step="Training models"
        )
        # Verify update
        updated_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=training_job_in_db.job_id,
            tenant_id=training_job_in_db.tenant_id
        )
        assert updated_job.status == "running"
        assert updated_job.progress == 50
        assert updated_job.current_step == "Training models"
    @pytest.mark.asyncio
    async def test_store_trained_models(
        self, 
        training_service, 
        test_db_session
    ):
        """Test storing trained models"""
        tenant_id = "test-tenant"
        training_results = {
            "training_results": {
                "Pan Integral": {
                    "status": "success",
                    "model_info": {
                        "model_id": "test-model-123",
                        "model_path": "/test/models/test-model-123.pkl",
                        "type": "prophet",
                        "training_samples": 100,
                        "features": ["temperature", "humidity"],
                        "hyperparameters": {"seasonality_mode": "additive"},
                        "training_metrics": {"mae": 5.2, "rmse": 7.8},
                        "data_period": {
                            "start_date": "2024-01-01T00:00:00",
                            "end_date": "2024-01-31T00:00:00"
                        }
                    }
                }
            }
        }
        await training_service._store_trained_models(
            db=test_db_session,
            tenant_id=tenant_id,
            training_results=training_results
        )
        # Verify model was stored
        from sqlalchemy import select
        result = await test_db_session.execute(
            select(TrainedModel).where(
                TrainedModel.tenant_id == tenant_id,
                TrainedModel.product_name == "Pan Integral"
            )
        )
        stored_model = result.scalar_one_or_none()
        assert stored_model is not None
        assert stored_model.model_id == "test-model-123"
        assert stored_model.is_active is True
    @pytest.mark.asyncio
    async def test_get_training_logs(
        self, 
        training_service, 
        test_db_session,
        training_job_in_db
    ):
        """Test getting training logs"""
        result = await training_service.get_training_logs(
            db=test_db_session,
            job_id=training_job_in_db.job_id,
            tenant_id=training_job_in_db.tenant_id
        )
        assert isinstance(result, list)
        assert len(result) > 0
        # Check log content
        log_text = " ".join(result)
        assert training_job_in_db.job_id in log_text or "Job started" in log_text
 class TestTrainingServiceDataFetching:
    """Test external data fetching functionality"""
    @pytest.fixture
    def training_service(self):
        return TrainingService()
    @pytest.mark.asyncio
    async def test_fetch_sales_data_success(self, training_service):
        """Test successful sales data fetching"""
        mock_request = Mock()
        mock_request.start_date = None
        mock_request.end_date = None
        mock_response_data = {
            "sales": [
                {"date": "2024-01-01", "product_name": "Pan Integral", "quantity": 45}
            ]
        }
        with patch('httpx.AsyncClient') as mock_client:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = mock_response_data
            mock_client.return_value.__aenter__.return_value.get.return_value = mock_response
            result = await training_service._fetch_sales_data(
                tenant_id="test-tenant",
                request=mock_request
            )
        assert result == mock_response_data["sales"]
    @pytest.mark.asyncio
    async def test_fetch_sales_data_error(self, training_service):
        """Test sales data fetching with API error"""
        mock_request = Mock()
        mock_request.start_date = None
        mock_request.end_date = None
        with patch('httpx.AsyncClient') as mock_client:
            mock_response = Mock()
            mock_response.status_code = 500
            mock_client.return_value.__aenter__.return_value.get.return_value = mock_response
            result = await training_service._fetch_sales_data(
                tenant_id="test-tenant",
                request=mock_request
            )
        assert result == []
    @pytest.mark.asyncio
    async def test_fetch_weather_data_success(self, training_service):
        """Test successful weather data fetching"""
        mock_request = Mock()
        mock_request.start_date = None
        mock_request.end_date = None
        mock_response_data = {
            "weather": [
                {"date": "2024-01-01", "temperature": 15.2, "precipitation": 0.0}
            ]
        }
        with patch('httpx.AsyncClient') as mock_client:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = mock_response_data
            mock_client.return_value.__aenter__.return_value.get.return_value = mock_response
            result = await training_service._fetch_weather_data(
                tenant_id="test-tenant",
                request=mock_request
            )
        assert result == mock_response_data["weather"]
    @pytest.mark.asyncio
    async def test_fetch_traffic_data_success(self, training_service):
        """Test successful traffic data fetching"""
        mock_request = Mock()
        mock_request.start_date = None
        mock_request.end_date = None
        mock_response_data = {
            "traffic": [
                {"date": "2024-01-01", "traffic_volume": 120}
            ]
        }
        with patch('httpx.AsyncClient') as mock_client:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = mock_response_data
            mock_client.return_value.__aenter__.return_value.get.return_value = mock_response
            result = await training_service._fetch_traffic_data(
                tenant_id="test-tenant",
                request=mock_request
            )
        assert result == mock_response_data["traffic"]
    @pytest.mark.asyncio
    async def test_fetch_data_with_date_filters(self, training_service):
        """Test data fetching with date filters"""
        from datetime import datetime
        mock_request = Mock()
        mock_request.start_date = datetime(2024, 1, 1)
        mock_request.end_date = datetime(2024, 1, 31)
        with patch('httpx.AsyncClient') as mock_client:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = {"sales": []}
            mock_get = mock_client.return_value.__aenter__.return_value.get
            mock_get.return_value = mock_response
            await training_service._fetch_sales_data(
                tenant_id="test-tenant",
                request=mock_request
            )
            # Verify dates were passed in params
            call_args = mock_get.call_args
            params = call_args[1]["params"]
            assert "start_date" in params
            assert "end_date" in params
            assert params["start_date"] == "2024-01-01T00:00:00"
            assert params["end_date"] == "2024-01-31T00:00:00"
 class TestTrainingServiceExecution:
    """Test training execution workflow"""
    @pytest.fixture
    def training_service(self, mock_ml_trainer):
        return TrainingService()
    @pytest.mark.asyncio
    async def test_execute_training_job_success(
        self, 
        training_service, 
        test_db_session,
        mock_messaging,
        mock_data_service
    ):
        """Test successful training job execution"""
        # Create job first
        job_id = "test-execution-job"
        training_log = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id=job_id,
            config={"include_weather": True}
        )
        request = TrainingJobRequest(
            include_weather=True,
            include_traffic=True,
            min_data_points=30
        )
        with patch('app.services.training_service.TrainingService._fetch_sales_data') as mock_fetch_sales, \
             patch('app.services.training_service.TrainingService._fetch_weather_data') as mock_fetch_weather, \
             patch('app.services.training_service.TrainingService._fetch_traffic_data') as mock_fetch_traffic, \
             patch('app.services.training_service.TrainingService._store_trained_models') as mock_store:
            mock_fetch_sales.return_value = [{"date": "2024-01-01", "product_name": "Pan Integral", "quantity": 45}]
            mock_fetch_weather.return_value = []
            mock_fetch_traffic.return_value = []
            mock_store.return_value = None
            await training_service.execute_training_job(
                db=test_db_session,
                job_id=job_id,
                tenant_id="test-tenant",
                request=request
            )
        # Verify job was completed
        updated_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=job_id,
            tenant_id="test-tenant"
        )
        assert updated_job.status == "completed"
        assert updated_job.progress == 100
    @pytest.mark.asyncio
    async def test_execute_training_job_failure(
        self, 
        training_service, 
        test_db_session,
        mock_messaging
    ):
        """Test training job execution with failure"""
        # Create job first
        job_id = "test-failure-job"
        await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id=job_id,
            config={}
        )
        request = TrainingJobRequest(min_data_points=30)
        with patch('app.services.training_service.TrainingService._fetch_sales_data') as mock_fetch:
            mock_fetch.side_effect = Exception("Data service unavailable")
            with pytest.raises(Exception):
                await training_service.execute_training_job(
                    db=test_db_session,
                    job_id=job_id,
                    tenant_id="test-tenant",
                    request=request
                )
        # Verify job was marked as failed
        updated_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=job_id,
            tenant_id="test-tenant"
        )
        assert updated_job.status == "failed"
        assert "Data service unavailable" in updated_job.error_message
    @pytest.mark.asyncio
    async def test_execute_single_product_training_success(
        self, 
        training_service, 
        test_db_session,
        mock_messaging,
        mock_data_service
    ):
        """Test successful single product training execution"""
        job_id = "test-single-product-job"
        product_name = "Pan Integral"
        await training_service.create_single_product_job(
            db=test_db_session,
            tenant_id="test-tenant",
            product_name=product_name,
            job_id=job_id,
            config={}
        )
        request = SingleProductTrainingRequest(
            include_weather=True,
            include_traffic=False
        )
        with patch('app.services.training_service.TrainingService._fetch_product_sales_data') as mock_fetch_sales, \
             patch('app.services.training_service.TrainingService._fetch_weather_data') as mock_fetch_weather, \
             patch('app.services.training_service.TrainingService._store_single_trained_model') as mock_store:
            mock_fetch_sales.return_value = [{"date": "2024-01-01", "product_name": product_name, "quantity": 45}]
            mock_fetch_weather.return_value = []
            mock_store.return_value = None
            await training_service.execute_single_product_training(
                db=test_db_session,
                job_id=job_id,
                tenant_id="test-tenant",
                product_name=product_name,
                request=request
            )
        # Verify job was completed
        updated_job = await training_service.get_job_status(
            db=test_db_session,
            job_id=job_id,
            tenant_id="test-tenant"
        )
        assert updated_job.status == "completed"
        assert updated_job.progress == 100
 class TestTrainingServiceEdgeCases:
    """Test edge cases and error conditions"""
    @pytest.fixture
    def training_service(self):
        return TrainingService()
    @pytest.mark.asyncio
    async def test_database_connection_failure(self, training_service):
        """Test handling of database connection failures"""
        with patch('sqlalchemy.ext.asyncio.AsyncSession') as mock_session:
            mock_session.side_effect = Exception("Database connection failed")
            with pytest.raises(Exception):
                await training_service.create_training_job(
                    db=mock_session,
                    tenant_id="test-tenant",
                    job_id="test-job",
                    config={}
                )
    @pytest.mark.asyncio
    async def test_external_service_timeout(self, training_service):
        """Test handling of external service timeouts"""
        mock_request = Mock()
        mock_request.start_date = None
        mock_request.end_date = None
        with patch('httpx.AsyncClient') as mock_client:
            mock_client.return_value.__aenter__.return_value.get.side_effect = httpx.TimeoutException("Request timeout")
            result = await training_service._fetch_sales_data(
                tenant_id="test-tenant",
                request=mock_request
            )
            # Should return empty list on timeout
            assert result == []
    @pytest.mark.asyncio
    async def test_concurrent_job_creation(self, training_service, test_db_session):
        """Test handling of concurrent job creation"""
        # This test would need more sophisticated setup for true concurrency testing
        # For now, just test that multiple jobs can be created
        job_ids = ["concurrent-job-1", "concurrent-job-2", "concurrent-job-3"]
        jobs = []
        for job_id in job_ids:
            job = await training_service.create_training_job(
                db=test_db_session,
                tenant_id="test-tenant",
                job_id=job_id,
                config={}
            )
            jobs.append(job)
        assert len(jobs) == 3
        for i, job in enumerate(jobs):
            assert job.job_id == job_ids[i]
    @pytest.mark.asyncio
    async def test_malformed_config_handling(self, training_service, test_db_session):
        """Test handling of malformed configuration"""
        malformed_config = {
            "invalid_key": "invalid_value",
            "nested": {"data": None}
        }
        # Should not raise exception, just store the config as-is
        job = await training_service.create_training_job(
            db=test_db_session,
            tenant_id="test-tenant",
            job_id="malformed-config-job",
            config=malformed_config
        )
        assert job.config == malformed_config