Improve AI logic

2025-11-05 13:34:56 +01:00
parent 5c87fbcf48
commit 394ad3aea4
218 changed files with 30627 additions and 7658 deletions
--- a/services/production/app/api/ml_insights.py
+++ b/services/production/app/api/ml_insights.py
@@ -0,0 +1,288 @@
+"""
+ML Insights API Endpoints for Production Service
+
+Provides endpoints to trigger ML insight generation for:
+- Production yield predictions
+- Quality optimization
+- Process efficiency analysis
+"""
+
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel, Field
+from typing import Optional, List
+from uuid import UUID
+from datetime import datetime, timedelta
+import structlog
+import pandas as pd
+
+from app.core.database import get_db
+from sqlalchemy.ext.asyncio import AsyncSession
+
+logger = structlog.get_logger()
+
+router = APIRouter(
+    prefix="/api/v1/tenants/{tenant_id}/production/ml/insights",
+    tags=["ML Insights"]
+)
+
+
+# ================================================================
+# REQUEST/RESPONSE SCHEMAS
+# ================================================================
+
+class YieldPredictionRequest(BaseModel):
+    """Request schema for yield prediction"""
+    recipe_ids: Optional[List[str]] = Field(
+        None,
+        description="Specific recipe IDs to analyze. If None, analyzes all recipes"
+    )
+    lookback_days: int = Field(
+        90,
+        description="Days of historical production to analyze",
+        ge=30,
+        le=365
+    )
+    min_history_runs: int = Field(
+        30,
+        description="Minimum production runs required",
+        ge=10,
+        le=100
+    )
+
+
+class YieldPredictionResponse(BaseModel):
+    """Response schema for yield prediction"""
+    success: bool
+    message: str
+    tenant_id: str
+    recipes_analyzed: int
+    total_insights_generated: int
+    total_insights_posted: int
+    recipes_with_issues: int
+    insights_by_recipe: dict
+    errors: List[str] = []
+
+
+# ================================================================
+# API ENDPOINTS
+# ================================================================
+
+@router.post("/predict-yields", response_model=YieldPredictionResponse)
+async def trigger_yield_prediction(
+    tenant_id: str,
+    request_data: YieldPredictionRequest,
+    db: AsyncSession = Depends(get_db)
+):
+    """
+    Trigger yield prediction for production recipes.
+
+    This endpoint:
+    1. Fetches historical production data for specified recipes
+    2. Runs the YieldInsightsOrchestrator to predict yields
+    3. Generates insights about yield optimization opportunities
+    4. Posts insights to AI Insights Service
+
+    Args:
+        tenant_id: Tenant UUID
+        request_data: Prediction parameters
+        db: Database session
+
+    Returns:
+        YieldPredictionResponse with prediction results
+    """
+    logger.info(
+        "ML insights yield prediction requested",
+        tenant_id=tenant_id,
+        recipe_ids=request_data.recipe_ids,
+        lookback_days=request_data.lookback_days
+    )
+
+    try:
+        # Import ML orchestrator and clients
+        from app.ml.yield_insights_orchestrator import YieldInsightsOrchestrator
+        from shared.clients.recipes_client import RecipesServiceClient
+        from app.core.config import settings
+
+        # Initialize orchestrator and recipes client
+        orchestrator = YieldInsightsOrchestrator()
+        recipes_client = RecipesServiceClient(settings)
+
+        # Get recipes to analyze from recipes service via API
+        if request_data.recipe_ids:
+            # Fetch specific recipes
+            recipes = []
+            for recipe_id in request_data.recipe_ids:
+                recipe = await recipes_client.get_recipe_by_id(
+                    recipe_id=recipe_id,
+                    tenant_id=tenant_id
+                )
+                if recipe:
+                    recipes.append(recipe)
+        else:
+            # Fetch all recipes for tenant (limit to 10)
+            all_recipes = await recipes_client.get_all_recipes(tenant_id=tenant_id)
+            recipes = all_recipes[:10] if all_recipes else []  # Limit to prevent timeout
+
+        if not recipes:
+            return YieldPredictionResponse(
+                success=False,
+                message="No recipes found for analysis",
+                tenant_id=tenant_id,
+                recipes_analyzed=0,
+                total_insights_generated=0,
+                total_insights_posted=0,
+                recipes_with_issues=0,
+                insights_by_recipe={},
+                errors=["No recipes found"]
+            )
+
+        # Calculate date range for production history
+        end_date = datetime.utcnow()
+        start_date = end_date - timedelta(days=request_data.lookback_days)
+
+        # Process each recipe
+        total_insights_generated = 0
+        total_insights_posted = 0
+        recipes_with_issues = 0
+        insights_by_recipe = {}
+        errors = []
+
+        for recipe in recipes:
+            try:
+                recipe_id = str(recipe['id'])
+                recipe_name = recipe.get('name', 'Unknown Recipe')
+                logger.info(f"Analyzing yield for {recipe_name} ({recipe_id})")
+
+                # Fetch real production batch history from database
+                from app.models.production import ProductionBatch, ProductionStatus
+                from sqlalchemy import select
+
+                batch_query = select(ProductionBatch).where(
+                    ProductionBatch.tenant_id == UUID(tenant_id),
+                    ProductionBatch.recipe_id == UUID(recipe_id),  # Use the extracted UUID
+                    ProductionBatch.actual_start_time >= start_date,
+                    ProductionBatch.actual_start_time <= end_date,
+                    ProductionBatch.status == ProductionStatus.COMPLETED,
+                    ProductionBatch.actual_quantity.isnot(None)
+                ).order_by(ProductionBatch.actual_start_time)
+
+                batch_result = await db.execute(batch_query)
+                batches = batch_result.scalars().all()
+
+                if len(batches) < request_data.min_history_runs:
+                    logger.warning(
+                        f"Insufficient production history for recipe {recipe_id}: "
+                        f"{len(batches)} batches < {request_data.min_history_runs} required"
+                    )
+                    continue
+
+                # Create production history DataFrame from real batches
+                production_data = []
+                for batch in batches:
+                    # Calculate yield percentage
+                    if batch.planned_quantity and batch.actual_quantity:
+                        yield_pct = (batch.actual_quantity / batch.planned_quantity) * 100
+                    else:
+                        continue  # Skip batches without complete data
+
+                    production_data.append({
+                        'production_date': batch.actual_start_time,
+                        'planned_quantity': float(batch.planned_quantity),
+                        'actual_quantity': float(batch.actual_quantity),
+                        'yield_percentage': yield_pct,
+                        'worker_id': batch.notes or 'unknown',  # Use notes field or default
+                        'batch_number': batch.batch_number
+                    })
+
+                if not production_data:
+                    logger.warning(
+                        f"No valid production data for recipe {recipe_id}"
+                    )
+                    continue
+
+                production_history = pd.DataFrame(production_data)
+
+                # Run yield analysis
+                results = await orchestrator.analyze_and_post_insights(
+                    tenant_id=tenant_id,
+                    recipe_id=recipe_id,
+                    production_history=production_history,
+                    min_history_runs=request_data.min_history_runs
+                )
+
+                # Track results
+                total_insights_generated += results['insights_generated']
+                total_insights_posted += results['insights_posted']
+
+                baseline_stats = results.get('baseline_stats', {})
+                mean_yield = baseline_stats.get('mean_yield', 100)
+                if mean_yield < 90:
+                    recipes_with_issues += 1
+
+                insights_by_recipe[recipe_id] = {
+                    'recipe_name': recipe_name,
+                    'insights_posted': results['insights_posted'],
+                    'mean_yield': mean_yield,
+                    'patterns': len(results.get('patterns', []))
+                }
+
+                logger.info(
+                    f"Recipe {recipe_id} analysis complete",
+                    insights_posted=results['insights_posted'],
+                    mean_yield=mean_yield
+                )
+
+            except Exception as e:
+                error_msg = f"Error analyzing recipe {recipe_id}: {str(e)}"
+                logger.error(error_msg, exc_info=True)
+                errors.append(error_msg)
+
+        # Close orchestrator and clients
+        await orchestrator.close()
+        await recipes_client.close()
+
+        # Build response
+        response = YieldPredictionResponse(
+            success=total_insights_posted > 0,
+            message=f"Successfully analyzed {len([r for r in recipes if isinstance(r, dict)])} recipes, generated {total_insights_posted} insights",
+            tenant_id=tenant_id,
+            recipes_analyzed=len([r for r in recipes if isinstance(r, dict)]),
+            total_insights_generated=total_insights_generated,
+            total_insights_posted=total_insights_posted,
+            recipes_with_issues=recipes_with_issues,
+            insights_by_recipe=insights_by_recipe,
+            errors=errors
+        )
+
+        logger.info(
+            "ML insights yield prediction complete",
+            tenant_id=tenant_id,
+            total_insights=total_insights_posted,
+            recipes_with_issues=recipes_with_issues
+        )
+
+        return response
+
+    except Exception as e:
+        logger.error(
+            "ML insights yield prediction failed",
+            tenant_id=tenant_id,
+            error=str(e),
+            exc_info=True
+        )
+        raise HTTPException(
+            status_code=500,
+            detail=f"Yield prediction failed: {str(e)}"
+        )
+
+
+@router.get("/health")
+async def ml_insights_health():
+    """Health check for ML insights endpoints"""
+    return {
+        "status": "healthy",
+        "service": "production-ml-insights",
+        "endpoints": [
+            "POST /ml/insights/predict-yields"
+        ]
+    }
--- a/services/production/app/api/orchestrator.py
+++ b/services/production/app/api/orchestrator.py
@@ -101,7 +101,7 @@ class GenerateScheduleResponse(BaseModel):
 # ================================================================

@router.post(
-    route_builder.build_nested_resource_route("", None, "generate-schedule"),
+    route_builder.build_operations_route("generate-schedule"),
    response_model=GenerateScheduleResponse
 )
 async def generate_production_schedule(
--- a/services/production/app/api/production_operations.py
+++ b/services/production/app/api/production_operations.py
@@ -305,6 +305,31 @@ async def reserve_capacity(
        raise HTTPException(status_code=500, detail="Failed to reserve capacity")


+@router.get(
+    "/api/v1/tenants/{tenant_id}/production/capacity/date/{date}",
+    response_model=list
+)
+async def get_capacity_by_date(
+    tenant_id: UUID = Path(...),
+    date: date = Path(..., description="Date to retrieve capacity for (format: YYYY-MM-DD)"),
+    current_user: dict = Depends(get_current_user_dep),
+    production_service: ProductionService = Depends(get_production_service)
+):
+    """Get capacity by date (using direct route to support date path parameter)"""
+    try:
+        capacity_data = await production_service.get_capacity_by_date(tenant_id, date)
+
+        logger.info("Retrieved capacity by date",
+                   tenant_id=str(tenant_id), date=date.isoformat())
+
+        return capacity_data
+
+    except Exception as e:
+        logger.error("Error getting capacity by date",
+                    error=str(e), tenant_id=str(tenant_id), date=date.isoformat())
+        raise HTTPException(status_code=500, detail="Failed to get capacity by date")
+
+
@router.get(
    route_builder.build_operations_route("capacity/bottlenecks"),
    response_model=dict
--- a/services/production/app/main.py
+++ b/services/production/app/main.py
@@ -26,7 +26,8 @@ from app.api import (
    internal_demo,
    orchestrator,  # NEW: Orchestrator integration endpoint
    production_orders_operations,  # Tenant deletion endpoints
-    audit
+    audit,
+    ml_insights  # ML insights endpoint
 )


@@ -164,6 +165,7 @@ service.add_router(production_operations.router)
 service.add_router(production_dashboard.router)
 service.add_router(analytics.router)
 service.add_router(internal_demo.router)
+service.add_router(ml_insights.router)  # ML insights endpoint

 # REMOVED: test_production_scheduler endpoint
 # Production scheduling is now triggered by the Orchestrator Service
--- a/services/production/app/ml/yield_insights_orchestrator.py
+++ b/services/production/app/ml/yield_insights_orchestrator.py
@@ -0,0 +1,415 @@
+"""
+Yield Insights Orchestrator
+Coordinates yield prediction and insight posting
+"""
+
+import pandas as pd
+from typing import Dict, List, Any, Optional
+import structlog
+from datetime import datetime
+from uuid import UUID
+import sys
+import os
+
+# Add shared clients to path
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
+from shared.clients.ai_insights_client import AIInsightsClient
+
+from app.ml.yield_predictor import YieldPredictor
+
+logger = structlog.get_logger()
+
+
+class YieldInsightsOrchestrator:
+    """
+    Orchestrates yield prediction and insight generation workflow.
+
+    Workflow:
+    1. Predict yield for upcoming production run or analyze historical performance
+    2. Generate insights for yield optimization opportunities
+    3. Post insights to AI Insights Service
+    4. Provide yield predictions for production planning
+    """
+
+    def __init__(
+        self,
+        ai_insights_base_url: str = "http://ai-insights-service:8000"
+    ):
+        self.predictor = YieldPredictor()
+        self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
+
+    async def predict_and_post_insights(
+        self,
+        tenant_id: str,
+        recipe_id: str,
+        production_history: pd.DataFrame,
+        production_context: Dict[str, Any],
+        min_history_runs: int = 30
+    ) -> Dict[str, Any]:
+        """
+        Complete workflow: Predict yield and post insights.
+
+        Args:
+            tenant_id: Tenant identifier
+            recipe_id: Recipe identifier
+            production_history: Historical production runs
+            production_context: Upcoming production context:
+                - worker_id
+                - planned_start_time
+                - batch_size
+                - planned_quantity
+                - unit_cost (optional)
+                - equipment_id (optional)
+            min_history_runs: Minimum production runs required
+
+        Returns:
+            Workflow results with prediction and posted insights
+        """
+        logger.info(
+            "Starting yield prediction workflow",
+            tenant_id=tenant_id,
+            recipe_id=recipe_id,
+            history_runs=len(production_history)
+        )
+
+        # Step 1: Predict yield
+        prediction_results = await self.predictor.predict_yield(
+            tenant_id=tenant_id,
+            recipe_id=recipe_id,
+            production_history=production_history,
+            production_context=production_context,
+            min_history_runs=min_history_runs
+        )
+
+        logger.info(
+            "Yield prediction complete",
+            recipe_id=recipe_id,
+            predicted_yield=prediction_results.get('predicted_yield'),
+            insights_generated=len(prediction_results.get('insights', []))
+        )
+
+        # Step 2: Enrich insights with tenant_id and recipe context
+        enriched_insights = self._enrich_insights(
+            prediction_results.get('insights', []),
+            tenant_id,
+            recipe_id
+        )
+
+        # Step 3: Post insights to AI Insights Service
+        if enriched_insights:
+            post_results = await self.ai_insights_client.create_insights_bulk(
+                tenant_id=UUID(tenant_id),
+                insights=enriched_insights
+            )
+
+            logger.info(
+                "Yield insights posted to AI Insights Service",
+                recipe_id=recipe_id,
+                total=post_results['total'],
+                successful=post_results['successful'],
+                failed=post_results['failed']
+            )
+        else:
+            post_results = {'total': 0, 'successful': 0, 'failed': 0}
+            logger.info("No insights to post for recipe", recipe_id=recipe_id)
+
+        # Step 4: Return comprehensive results
+        return {
+            'tenant_id': tenant_id,
+            'recipe_id': recipe_id,
+            'predicted_at': prediction_results['predicted_at'],
+            'history_runs': prediction_results['history_runs'],
+            'baseline_yield': prediction_results.get('baseline_yield'),
+            'predicted_yield': prediction_results.get('predicted_yield'),
+            'prediction_range': prediction_results.get('prediction_range'),
+            'expected_waste': prediction_results.get('expected_waste'),
+            'confidence': prediction_results['confidence'],
+            'factor_analysis': prediction_results.get('factor_analysis'),
+            'patterns': prediction_results.get('patterns', []),
+            'insights_generated': len(enriched_insights),
+            'insights_posted': post_results['successful'],
+            'insights_failed': post_results['failed'],
+            'created_insights': post_results.get('created_insights', [])
+        }
+
+    async def analyze_and_post_insights(
+        self,
+        tenant_id: str,
+        recipe_id: str,
+        production_history: pd.DataFrame,
+        min_history_runs: int = 30
+    ) -> Dict[str, Any]:
+        """
+        Analyze historical yield performance and post insights (no prediction).
+
+        Args:
+            tenant_id: Tenant identifier
+            recipe_id: Recipe identifier
+            production_history: Historical production runs
+            min_history_runs: Minimum production runs required
+
+        Returns:
+            Workflow results with analysis and posted insights
+        """
+        logger.info(
+            "Starting yield analysis workflow",
+            tenant_id=tenant_id,
+            recipe_id=recipe_id,
+            history_runs=len(production_history)
+        )
+
+        # Step 1: Analyze historical yield
+        analysis_results = await self.predictor.analyze_recipe_yield_history(
+            tenant_id=tenant_id,
+            recipe_id=recipe_id,
+            production_history=production_history,
+            min_history_runs=min_history_runs
+        )
+
+        logger.info(
+            "Yield analysis complete",
+            recipe_id=recipe_id,
+            baseline_yield=analysis_results.get('baseline_stats', {}).get('mean_yield'),
+            insights_generated=len(analysis_results.get('insights', []))
+        )
+
+        # Step 2: Enrich insights
+        enriched_insights = self._enrich_insights(
+            analysis_results.get('insights', []),
+            tenant_id,
+            recipe_id
+        )
+
+        # Step 3: Post insights
+        if enriched_insights:
+            post_results = await self.ai_insights_client.create_insights_bulk(
+                tenant_id=UUID(tenant_id),
+                insights=enriched_insights
+            )
+
+            logger.info(
+                "Yield analysis insights posted",
+                recipe_id=recipe_id,
+                total=post_results['total'],
+                successful=post_results['successful']
+            )
+        else:
+            post_results = {'total': 0, 'successful': 0, 'failed': 0}
+
+        return {
+            'tenant_id': tenant_id,
+            'recipe_id': recipe_id,
+            'analyzed_at': analysis_results['analyzed_at'],
+            'history_runs': analysis_results['history_runs'],
+            'baseline_stats': analysis_results.get('baseline_stats'),
+            'factor_analysis': analysis_results.get('factor_analysis'),
+            'patterns': analysis_results.get('patterns', []),
+            'insights_generated': len(enriched_insights),
+            'insights_posted': post_results['successful'],
+            'created_insights': post_results.get('created_insights', [])
+        }
+
+    def _enrich_insights(
+        self,
+        insights: List[Dict[str, Any]],
+        tenant_id: str,
+        recipe_id: str
+    ) -> List[Dict[str, Any]]:
+        """
+        Enrich insights with required fields for AI Insights Service.
+
+        Args:
+            insights: Raw insights from predictor
+            tenant_id: Tenant identifier
+            recipe_id: Recipe identifier
+
+        Returns:
+            Enriched insights ready for posting
+        """
+        enriched = []
+
+        for insight in insights:
+            # Add required tenant_id
+            enriched_insight = insight.copy()
+            enriched_insight['tenant_id'] = tenant_id
+
+            # Add recipe context to metrics
+            if 'metrics_json' not in enriched_insight:
+                enriched_insight['metrics_json'] = {}
+
+            enriched_insight['metrics_json']['recipe_id'] = recipe_id
+
+            # Add source metadata
+            enriched_insight['source_service'] = 'production'
+            enriched_insight['source_model'] = 'yield_predictor'
+            enriched_insight['detected_at'] = datetime.utcnow().isoformat()
+
+            enriched.append(enriched_insight)
+
+        return enriched
+
+    async def analyze_all_recipes(
+        self,
+        tenant_id: str,
+        recipes_data: Dict[str, pd.DataFrame],
+        min_history_runs: int = 30
+    ) -> Dict[str, Any]:
+        """
+        Analyze yield performance for all recipes for a tenant.
+
+        Args:
+            tenant_id: Tenant identifier
+            recipes_data: Dict of {recipe_id: production_history_df}
+            min_history_runs: Minimum production runs required
+
+        Returns:
+            Comprehensive analysis results
+        """
+        logger.info(
+            "Analyzing yield for all recipes",
+            tenant_id=tenant_id,
+            recipes=len(recipes_data)
+        )
+
+        all_results = []
+        total_insights_posted = 0
+        recipes_with_issues = []
+
+        # Analyze each recipe
+        for recipe_id, production_history in recipes_data.items():
+            try:
+                results = await self.analyze_and_post_insights(
+                    tenant_id=tenant_id,
+                    recipe_id=recipe_id,
+                    production_history=production_history,
+                    min_history_runs=min_history_runs
+                )
+
+                all_results.append(results)
+                total_insights_posted += results['insights_posted']
+
+                # Check for low baseline yield
+                baseline_stats = results.get('baseline_stats')
+                if baseline_stats and baseline_stats.get('mean_yield', 100) < 90:
+                    recipes_with_issues.append({
+                        'recipe_id': recipe_id,
+                        'mean_yield': baseline_stats['mean_yield'],
+                        'std_yield': baseline_stats['std_yield']
+                    })
+
+            except Exception as e:
+                logger.error(
+                    "Error analyzing recipe",
+                    recipe_id=recipe_id,
+                    error=str(e)
+                )
+
+        # Generate portfolio summary insight if there are yield issues
+        if len(recipes_with_issues) > 0:
+            summary_insight = self._generate_portfolio_summary_insight(
+                tenant_id, recipes_with_issues, all_results
+            )
+
+            if summary_insight:
+                enriched_summary = self._enrich_insights(
+                    [summary_insight], tenant_id, 'all_recipes'
+                )
+
+                post_results = await self.ai_insights_client.create_insights_bulk(
+                    tenant_id=UUID(tenant_id),
+                    insights=enriched_summary
+                )
+
+                total_insights_posted += post_results['successful']
+
+        logger.info(
+            "All recipes yield analysis complete",
+            tenant_id=tenant_id,
+            recipes_analyzed=len(all_results),
+            total_insights_posted=total_insights_posted,
+            recipes_with_issues=len(recipes_with_issues)
+        )
+
+        return {
+            'tenant_id': tenant_id,
+            'analyzed_at': datetime.utcnow().isoformat(),
+            'recipes_analyzed': len(all_results),
+            'recipe_results': all_results,
+            'total_insights_posted': total_insights_posted,
+            'recipes_with_issues': recipes_with_issues
+        }
+
+    def _generate_portfolio_summary_insight(
+        self,
+        tenant_id: str,
+        recipes_with_issues: List[Dict[str, Any]],
+        all_results: List[Dict[str, Any]]
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Generate portfolio-level summary insight.
+
+        Args:
+            tenant_id: Tenant identifier
+            recipes_with_issues: Recipes with low yield
+            all_results: All recipe analysis results
+
+        Returns:
+            Summary insight or None
+        """
+        if len(recipes_with_issues) == 0:
+            return None
+
+        # Calculate average yield and potential improvement
+        total_recipes = len(all_results)
+        issues_count = len(recipes_with_issues)
+        avg_low_yield = sum(r['mean_yield'] for r in recipes_with_issues) / issues_count
+
+        # Estimate waste reduction potential
+        # Assuming each recipe produces 1000 units/month, €5/unit cost
+        monthly_production = 1000 * issues_count
+        current_waste_pct = 100 - avg_low_yield
+        target_waste_pct = 5  # Target 95% yield
+
+        if current_waste_pct > target_waste_pct:
+            waste_reduction_units = monthly_production * ((current_waste_pct - target_waste_pct) / 100)
+            annual_savings = waste_reduction_units * 12 * 5  # €5 per unit
+
+            return {
+                'type': 'opportunity',
+                'priority': 'high' if issues_count > 3 else 'medium',
+                'category': 'production',
+                'title': f'Production Yield Optimization: {issues_count} Recipes Below 90%',
+                'description': f'{issues_count} of {total_recipes} recipes have average yield below 90% (average {avg_low_yield:.1f}%). Improving to 95% target would reduce waste by {waste_reduction_units:.0f} units/month, saving €{annual_savings:.0f}/year.',
+                'impact_type': 'cost_savings',
+                'impact_value': annual_savings,
+                'impact_unit': 'euros_per_year',
+                'confidence': 75,
+                'metrics_json': {
+                    'recipes_analyzed': total_recipes,
+                    'recipes_with_issues': issues_count,
+                    'avg_low_yield': round(avg_low_yield, 2),
+                    'potential_annual_savings': round(annual_savings, 2),
+                    'waste_reduction_units_monthly': round(waste_reduction_units, 2)
+                },
+                'actionable': True,
+                'recommendation_actions': [
+                    {
+                        'label': 'Review Low-Yield Recipes',
+                        'action': 'review_yield_insights',
+                        'params': {'tenant_id': tenant_id}
+                    },
+                    {
+                        'label': 'Implement Yield Improvements',
+                        'action': 'apply_yield_recommendations',
+                        'params': {'tenant_id': tenant_id}
+                    }
+                ],
+                'source_service': 'production',
+                'source_model': 'yield_predictor'
+            }
+
+        return None
+
+    async def close(self):
+        """Close HTTP client connections."""
+        await self.ai_insights_client.close()
--- a/services/production/app/ml/yield_predictor.py
+++ b/services/production/app/ml/yield_predictor.py
@@ -0,0 +1,799 @@
+"""
+Production Yield Predictor
+Predicts actual vs planned yield and identifies waste reduction opportunities
+"""
+
+import pandas as pd
+import numpy as np
+from typing import Dict, List, Any, Optional, Tuple
+from datetime import datetime, timedelta
+import structlog
+from scipy import stats
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+from sklearn.linear_model import LinearRegression
+from sklearn.preprocessing import StandardScaler
+import warnings
+
+warnings.filterwarnings('ignore')
+
+logger = structlog.get_logger()
+
+
+class YieldPredictor:
+    """
+    Predicts production yield based on historical data and production factors.
+
+    Key Features:
+    - Multi-factor yield prediction (recipe, worker, time-of-day, equipment, batch size)
+    - Identifies low-yield patterns and root causes
+    - Waste categorization (spoilage, measurement error, process inefficiency)
+    - Actionable recommendations for yield improvement
+    - Statistical validation of learned patterns
+
+    Methodology:
+    1. Feature Engineering: Extract worker skill, time factors, batch size effects
+    2. Statistical Analysis: Identify significant yield loss factors
+    3. ML Prediction: Ensemble of Random Forest + Gradient Boosting
+    4. Pattern Detection: Find recurring low-yield situations
+    5. Insight Generation: Actionable recommendations with confidence scores
+    """
+
+    def __init__(self):
+        self.model_cache = {}  # Cache trained models per recipe
+        self.baseline_yields = {}  # Cache baseline yields per recipe
+
+    async def predict_yield(
+        self,
+        tenant_id: str,
+        recipe_id: str,
+        production_history: pd.DataFrame,
+        production_context: Dict[str, Any],
+        min_history_runs: int = 30
+    ) -> Dict[str, Any]:
+        """
+        Predict yield for upcoming production run and generate insights.
+
+        Args:
+            tenant_id: Tenant identifier
+            recipe_id: Recipe identifier
+            production_history: Historical production runs with columns:
+                - production_run_id
+                - recipe_id
+                - planned_quantity
+                - actual_quantity
+                - yield_percentage
+                - worker_id
+                - started_at
+                - completed_at
+                - batch_size
+                - equipment_id (optional)
+                - notes (optional)
+            production_context: Upcoming production context:
+                - worker_id
+                - planned_start_time
+                - batch_size
+                - equipment_id (optional)
+            min_history_runs: Minimum production runs required for learning
+
+        Returns:
+            Prediction results with yield forecast, confidence, and insights
+        """
+        logger.info(
+            "Predicting production yield",
+            tenant_id=tenant_id,
+            recipe_id=recipe_id,
+            history_runs=len(production_history)
+        )
+
+        # Validate production history
+        if len(production_history) < min_history_runs:
+            return self._insufficient_data_response(
+                recipe_id, production_context, len(production_history), min_history_runs
+            )
+
+        # Step 1: Calculate baseline statistics
+        baseline_stats = self._calculate_baseline_statistics(production_history)
+
+        # Step 2: Feature engineering
+        feature_df = self._engineer_features(production_history)
+
+        # Step 3: Analyze yield factors
+        factor_analysis = self._analyze_yield_factors(feature_df)
+
+        # Step 4: Train predictive model
+        model_results = self._train_yield_model(feature_df)
+
+        # Step 5: Make prediction for upcoming run
+        prediction = self._predict_upcoming_run(
+            production_context, model_results, baseline_stats, feature_df
+        )
+
+        # Step 6: Identify low-yield patterns
+        patterns = self._identify_yield_patterns(feature_df, factor_analysis)
+
+        # Step 7: Generate insights
+        insights = self._generate_yield_insights(
+            tenant_id, recipe_id, baseline_stats, factor_analysis,
+            patterns, prediction, production_context
+        )
+
+        # Step 8: Calculate confidence
+        confidence = self._calculate_prediction_confidence(
+            production_history, model_results, factor_analysis
+        )
+
+        return {
+            'recipe_id': recipe_id,
+            'predicted_at': datetime.utcnow().isoformat(),
+            'history_runs': len(production_history),
+            'baseline_yield': baseline_stats['mean_yield'],
+            'baseline_std': baseline_stats['std_yield'],
+            'predicted_yield': prediction['predicted_yield'],
+            'prediction_range': prediction['prediction_range'],
+            'expected_waste': prediction['expected_waste'],
+            'confidence': confidence,
+            'factor_analysis': factor_analysis,
+            'patterns': patterns,
+            'model_performance': model_results['performance'],
+            'insights': insights
+        }
+
+    def _insufficient_data_response(
+        self, recipe_id: str, production_context: Dict[str, Any],
+        current_runs: int, required_runs: int
+    ) -> Dict[str, Any]:
+        """Return response when insufficient historical data."""
+        return {
+            'recipe_id': recipe_id,
+            'predicted_at': datetime.utcnow().isoformat(),
+            'history_runs': current_runs,
+            'status': 'insufficient_data',
+            'required_runs': required_runs,
+            'baseline_yield': None,
+            'predicted_yield': None,
+            'confidence': 0,
+            'insights': [{
+                'type': 'warning',
+                'priority': 'low',
+                'category': 'production',
+                'title': f'Insufficient Production History for Yield Prediction',
+                'description': f'Only {current_runs} production runs available. Need at least {required_runs} runs to build reliable yield predictions. Continue tracking production data to enable yield optimization.',
+                'impact_type': 'data_quality',
+                'confidence': 100,
+                'actionable': True,
+                'recommendation_actions': [{
+                    'label': 'Track Production Data',
+                    'action': 'continue_production_tracking',
+                    'params': {'recipe_id': recipe_id}
+                }]
+            }]
+        }
+
+    def _calculate_baseline_statistics(
+        self, production_history: pd.DataFrame
+    ) -> Dict[str, Any]:
+        """Calculate baseline yield statistics."""
+        yields = production_history['yield_percentage'].values
+
+        return {
+            'mean_yield': float(np.mean(yields)),
+            'median_yield': float(np.median(yields)),
+            'std_yield': float(np.std(yields)),
+            'min_yield': float(np.min(yields)),
+            'max_yield': float(np.max(yields)),
+            'cv_yield': float(np.std(yields) / np.mean(yields)),  # Coefficient of variation
+            'percentile_25': float(np.percentile(yields, 25)),
+            'percentile_75': float(np.percentile(yields, 75)),
+            'runs_below_90': int(np.sum(yields < 90)),
+            'runs_above_95': int(np.sum(yields > 95))
+        }
+
+    def _engineer_features(self, production_history: pd.DataFrame) -> pd.DataFrame:
+        """Engineer features from production history."""
+        df = production_history.copy()
+
+        # Time-based features
+        df['started_at'] = pd.to_datetime(df['started_at'])
+        df['hour_of_day'] = df['started_at'].dt.hour
+        df['day_of_week'] = df['started_at'].dt.dayofweek
+        df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)
+        df['is_early_morning'] = (df['hour_of_day'] < 6).astype(int)
+        df['is_late_night'] = (df['hour_of_day'] >= 22).astype(int)
+
+        # Duration features
+        if 'completed_at' in df.columns:
+            df['completed_at'] = pd.to_datetime(df['completed_at'])
+            df['duration_hours'] = (df['completed_at'] - df['started_at']).dt.total_seconds() / 3600
+            df['is_rushed'] = (df['duration_hours'] < df['duration_hours'].quantile(0.25)).astype(int)
+
+        # Batch size features
+        df['batch_size_normalized'] = df['batch_size'] / df['batch_size'].mean()
+        df['is_large_batch'] = (df['batch_size'] > df['batch_size'].quantile(0.75)).astype(int)
+        df['is_small_batch'] = (df['batch_size'] < df['batch_size'].quantile(0.25)).astype(int)
+
+        # Worker experience features (proxy: number of previous runs)
+        df = df.sort_values('started_at')
+        df['worker_run_count'] = df.groupby('worker_id').cumcount() + 1
+        df['worker_experience_level'] = pd.cut(
+            df['worker_run_count'],
+            bins=[0, 5, 15, 100],
+            labels=['novice', 'intermediate', 'expert']
+        )
+
+        # Recent yield trend for worker
+        df['worker_recent_avg_yield'] = df.groupby('worker_id')['yield_percentage'].transform(
+            lambda x: x.rolling(window=5, min_periods=1).mean()
+        )
+
+        return df
+
+    def _analyze_yield_factors(self, feature_df: pd.DataFrame) -> Dict[str, Any]:
+        """Analyze factors affecting yield using statistical tests."""
+        factors = {}
+
+        # Worker impact
+        worker_yields = feature_df.groupby('worker_id')['yield_percentage'].agg(['mean', 'std', 'count'])
+        worker_yields = worker_yields[worker_yields['count'] >= 3]  # Min 3 runs per worker
+
+        if len(worker_yields) > 1:
+            # ANOVA test: Does worker significantly affect yield?
+            worker_groups = [
+                feature_df[feature_df['worker_id'] == worker]['yield_percentage'].values
+                for worker in worker_yields.index
+            ]
+            f_stat, p_value = stats.f_oneway(*worker_groups)
+
+            factors['worker'] = {
+                'significant': p_value < 0.05,
+                'p_value': float(p_value),
+                'f_statistic': float(f_stat),
+                'best_worker': worker_yields['mean'].idxmax(),
+                'best_worker_yield': float(worker_yields['mean'].max()),
+                'worst_worker': worker_yields['mean'].idxmin(),
+                'worst_worker_yield': float(worker_yields['mean'].min()),
+                'yield_range': float(worker_yields['mean'].max() - worker_yields['mean'].min())
+            }
+        else:
+            factors['worker'] = {'significant': False, 'reason': 'insufficient_workers'}
+
+        # Time of day impact
+        time_groups = {
+            'early_morning': feature_df[feature_df['hour_of_day'] < 6]['yield_percentage'].values,
+            'morning': feature_df[(feature_df['hour_of_day'] >= 6) & (feature_df['hour_of_day'] < 12)]['yield_percentage'].values,
+            'afternoon': feature_df[(feature_df['hour_of_day'] >= 12) & (feature_df['hour_of_day'] < 18)]['yield_percentage'].values,
+            'evening': feature_df[feature_df['hour_of_day'] >= 18]['yield_percentage'].values
+        }
+        time_groups = {k: v for k, v in time_groups.items() if len(v) >= 3}
+
+        if len(time_groups) > 1:
+            f_stat, p_value = stats.f_oneway(*time_groups.values())
+            time_means = {k: np.mean(v) for k, v in time_groups.items()}
+
+            factors['time_of_day'] = {
+                'significant': p_value < 0.05,
+                'p_value': float(p_value),
+                'best_time': max(time_means, key=time_means.get),
+                'best_time_yield': float(max(time_means.values())),
+                'worst_time': min(time_means, key=time_means.get),
+                'worst_time_yield': float(min(time_means.values())),
+                'yield_range': float(max(time_means.values()) - min(time_means.values()))
+            }
+        else:
+            factors['time_of_day'] = {'significant': False, 'reason': 'insufficient_data'}
+
+        # Batch size impact (correlation)
+        if len(feature_df) >= 10:
+            correlation, p_value = stats.pearsonr(
+                feature_df['batch_size'],
+                feature_df['yield_percentage']
+            )
+
+            factors['batch_size'] = {
+                'significant': abs(correlation) > 0.3 and p_value < 0.05,
+                'correlation': float(correlation),
+                'p_value': float(p_value),
+                'direction': 'positive' if correlation > 0 else 'negative',
+                'interpretation': self._interpret_batch_size_effect(correlation)
+            }
+        else:
+            factors['batch_size'] = {'significant': False, 'reason': 'insufficient_data'}
+
+        # Weekend vs weekday
+        weekend_yields = feature_df[feature_df['is_weekend'] == 1]['yield_percentage'].values
+        weekday_yields = feature_df[feature_df['is_weekend'] == 0]['yield_percentage'].values
+
+        if len(weekend_yields) >= 3 and len(weekday_yields) >= 3:
+            t_stat, p_value = stats.ttest_ind(weekend_yields, weekday_yields)
+
+            factors['weekend_effect'] = {
+                'significant': p_value < 0.05,
+                'p_value': float(p_value),
+                't_statistic': float(t_stat),
+                'weekend_yield': float(np.mean(weekend_yields)),
+                'weekday_yield': float(np.mean(weekday_yields)),
+                'difference': float(np.mean(weekend_yields) - np.mean(weekday_yields))
+            }
+        else:
+            factors['weekend_effect'] = {'significant': False, 'reason': 'insufficient_weekend_data'}
+
+        return factors
+
+    def _interpret_batch_size_effect(self, correlation: float) -> str:
+        """Interpret batch size correlation."""
+        if abs(correlation) < 0.3:
+            return "Batch size has minimal impact on yield"
+        elif correlation > 0:
+            return "Larger batches tend to have higher yield (economies of scale)"
+        else:
+            return "Larger batches tend to have lower yield (difficulty handling large volumes)"
+
+    def _train_yield_model(self, feature_df: pd.DataFrame) -> Dict[str, Any]:
+        """Train ML model to predict yield."""
+        # Prepare features
+        feature_columns = [
+            'hour_of_day', 'day_of_week', 'is_weekend',
+            'batch_size_normalized', 'is_large_batch', 'is_small_batch',
+            'worker_run_count'
+        ]
+
+        if 'duration_hours' in feature_df.columns:
+            feature_columns.append('duration_hours')
+
+        # Encode worker_id
+        worker_encoding = {worker: idx for idx, worker in enumerate(feature_df['worker_id'].unique())}
+        feature_df['worker_encoded'] = feature_df['worker_id'].map(worker_encoding)
+        feature_columns.append('worker_encoded')
+
+        X = feature_df[feature_columns].fillna(0).values
+        y = feature_df['yield_percentage'].values
+
+        # Split into train/test (temporal split)
+        split_idx = int(len(X) * 0.8)
+        X_train, X_test = X[:split_idx], X[split_idx:]
+        y_train, y_test = y[:split_idx], y[split_idx:]
+
+        # Scale features
+        scaler = StandardScaler()
+        X_train_scaled = scaler.fit_transform(X_train)
+        X_test_scaled = scaler.transform(X_test)
+
+        # Train ensemble of models
+        models = {
+            'random_forest': RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42),
+            'gradient_boosting': GradientBoostingRegressor(n_estimators=50, max_depth=3, random_state=42),
+            'linear': LinearRegression()
+        }
+
+        performances = {}
+        predictions = {}
+
+        for name, model in models.items():
+            model.fit(X_train_scaled, y_train)
+            y_pred = model.predict(X_test_scaled)
+
+            mae = np.mean(np.abs(y_test - y_pred))
+            rmse = np.sqrt(np.mean((y_test - y_pred) ** 2))
+            r2 = 1 - (np.sum((y_test - y_pred) ** 2) / np.sum((y_test - np.mean(y_test)) ** 2))
+
+            performances[name] = {
+                'mae': float(mae),
+                'rmse': float(rmse),
+                'r2': float(r2)
+            }
+            predictions[name] = y_pred
+
+        # Select best model based on MAE
+        best_model_name = min(performances, key=lambda k: performances[k]['mae'])
+        best_model = models[best_model_name]
+
+        # Feature importance (if available)
+        feature_importance = {}
+        if hasattr(best_model, 'feature_importances_'):
+            importances = best_model.feature_importances_
+            feature_importance = {
+                feature_columns[i]: float(importances[i])
+                for i in range(len(feature_columns))
+            }
+            feature_importance = dict(sorted(
+                feature_importance.items(),
+                key=lambda x: x[1],
+                reverse=True
+            ))
+
+        return {
+            'best_model': best_model,
+            'best_model_name': best_model_name,
+            'scaler': scaler,
+            'feature_columns': feature_columns,
+            'worker_encoding': worker_encoding,
+            'performance': performances[best_model_name],
+            'all_performances': performances,
+            'feature_importance': feature_importance
+        }
+
+    def _predict_upcoming_run(
+        self,
+        production_context: Dict[str, Any],
+        model_results: Dict[str, Any],
+        baseline_stats: Dict[str, Any],
+        feature_df: pd.DataFrame
+    ) -> Dict[str, Any]:
+        """Predict yield for upcoming production run."""
+        # Extract context
+        worker_id = production_context.get('worker_id')
+        planned_start = pd.to_datetime(production_context.get('planned_start_time'))
+        batch_size = production_context.get('batch_size')
+
+        # Get worker experience
+        worker_runs = feature_df[feature_df['worker_id'] == worker_id]
+        worker_run_count = len(worker_runs) if len(worker_runs) > 0 else 1
+
+        # Build feature vector
+        mean_batch_size = feature_df['batch_size'].mean()
+        batch_size_normalized = batch_size / mean_batch_size
+        is_large_batch = 1 if batch_size > feature_df['batch_size'].quantile(0.75) else 0
+        is_small_batch = 1 if batch_size < feature_df['batch_size'].quantile(0.25) else 0
+
+        features = {
+            'hour_of_day': planned_start.hour,
+            'day_of_week': planned_start.dayofweek,
+            'is_weekend': 1 if planned_start.dayofweek in [5, 6] else 0,
+            'batch_size_normalized': batch_size_normalized,
+            'is_large_batch': is_large_batch,
+            'is_small_batch': is_small_batch,
+            'worker_run_count': worker_run_count,
+            'duration_hours': 0,  # Not known yet
+            'worker_encoded': model_results['worker_encoding'].get(worker_id, 0)
+        }
+
+        # Create feature vector in correct order
+        X = np.array([[features.get(col, 0) for col in model_results['feature_columns']]])
+        X_scaled = model_results['scaler'].transform(X)
+
+        # Predict
+        predicted_yield = float(model_results['best_model'].predict(X_scaled)[0])
+
+        # Prediction range (based on model RMSE)
+        rmse = model_results['performance']['rmse']
+        prediction_range = {
+            'lower': max(0, predicted_yield - 1.96 * rmse),
+            'upper': min(100, predicted_yield + 1.96 * rmse)
+        }
+
+        # Expected waste
+        planned_quantity = production_context.get('planned_quantity', 100)
+        expected_waste_pct = max(0, 100 - predicted_yield)
+        expected_waste_units = planned_quantity * (expected_waste_pct / 100)
+
+        return {
+            'predicted_yield': round(predicted_yield, 2),
+            'prediction_range': prediction_range,
+            'expected_waste_pct': round(expected_waste_pct, 2),
+            'expected_waste_units': round(expected_waste_units, 2),
+            'baseline_comparison': round(predicted_yield - baseline_stats['mean_yield'], 2),
+            'features_used': features
+        }
+
+    def _identify_yield_patterns(
+        self, feature_df: pd.DataFrame, factor_analysis: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        """Identify recurring low-yield patterns."""
+        patterns = []
+
+        # Pattern 1: Specific worker consistently low
+        if factor_analysis.get('worker', {}).get('significant'):
+            worst_worker = factor_analysis['worker']['worst_worker']
+            worst_yield = factor_analysis['worker']['worst_worker_yield']
+            best_yield = factor_analysis['worker']['best_worker_yield']
+
+            if worst_yield < 90 and (best_yield - worst_yield) > 5:
+                patterns.append({
+                    'pattern': 'low_yield_worker',
+                    'description': f'Worker {worst_worker} consistently produces {worst_yield:.1f}% yield vs best worker {best_yield:.1f}%',
+                    'severity': 'high' if worst_yield < 85 else 'medium',
+                    'affected_runs': int(len(feature_df[feature_df['worker_id'] == worst_worker])),
+                    'yield_impact': round(best_yield - worst_yield, 2),
+                    'recommendation': 'Provide additional training or reassign to different recipes'
+                })
+
+        # Pattern 2: Time-of-day effect
+        if factor_analysis.get('time_of_day', {}).get('significant'):
+            worst_time = factor_analysis['time_of_day']['worst_time']
+            worst_yield = factor_analysis['time_of_day']['worst_time_yield']
+
+            if worst_yield < 90:
+                patterns.append({
+                    'pattern': 'low_yield_time',
+                    'description': f'{worst_time} shifts produce {worst_yield:.1f}% yield',
+                    'severity': 'medium',
+                    'affected_runs': 'varies',
+                    'yield_impact': round(factor_analysis['time_of_day']['yield_range'], 2),
+                    'recommendation': f'Avoid scheduling this recipe during {worst_time}'
+                })
+
+        # Pattern 3: Large batch issues
+        if factor_analysis.get('batch_size', {}).get('significant'):
+            if factor_analysis['batch_size']['direction'] == 'negative':
+                patterns.append({
+                    'pattern': 'large_batch_yield_loss',
+                    'description': 'Larger batches have lower yield - equipment or process capacity issues',
+                    'severity': 'medium',
+                    'correlation': round(factor_analysis['batch_size']['correlation'], 3),
+                    'recommendation': 'Split large batches or upgrade equipment'
+                })
+
+        # Pattern 4: Weekend effect
+        if factor_analysis.get('weekend_effect', {}).get('significant'):
+            weekend_yield = factor_analysis['weekend_effect']['weekend_yield']
+            weekday_yield = factor_analysis['weekend_effect']['weekday_yield']
+
+            if abs(weekend_yield - weekday_yield) > 3:
+                if weekend_yield < weekday_yield:
+                    patterns.append({
+                        'pattern': 'weekend_yield_drop',
+                        'description': f'Weekend production {weekend_yield:.1f}% vs weekday {weekday_yield:.1f}%',
+                        'severity': 'low',
+                        'yield_impact': round(weekday_yield - weekend_yield, 2),
+                        'recommendation': 'Review weekend staffing or processes'
+                    })
+
+        return patterns
+
+    def _generate_yield_insights(
+        self,
+        tenant_id: str,
+        recipe_id: str,
+        baseline_stats: Dict[str, Any],
+        factor_analysis: Dict[str, Any],
+        patterns: List[Dict[str, Any]],
+        prediction: Dict[str, Any],
+        production_context: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        """Generate actionable insights for yield improvement."""
+        insights = []
+
+        # Insight 1: Low predicted yield warning
+        if prediction['predicted_yield'] < 90:
+            waste_value = prediction['expected_waste_units'] * production_context.get('unit_cost', 5)
+
+            insights.append({
+                'type': 'warning',
+                'priority': 'high' if prediction['predicted_yield'] < 85 else 'medium',
+                'category': 'production',
+                'title': f'Low Yield Predicted: {prediction["predicted_yield"]:.1f}%',
+                'description': f'Upcoming production run predicted to yield {prediction["predicted_yield"]:.1f}%, below baseline {baseline_stats["mean_yield"]:.1f}%. Expected waste: {prediction["expected_waste_units"]:.1f} units (€{waste_value:.2f}).',
+                'impact_type': 'waste',
+                'impact_value': prediction['expected_waste_units'],
+                'impact_unit': 'units',
+                'confidence': 75,
+                'metrics_json': {
+                    'recipe_id': recipe_id,
+                    'predicted_yield': prediction['predicted_yield'],
+                    'expected_waste': prediction['expected_waste_units'],
+                    'waste_value': round(waste_value, 2)
+                },
+                'actionable': True,
+                'recommendation_actions': [{
+                    'label': 'Review Production Setup',
+                    'action': 'review_production_factors',
+                    'params': {
+                        'recipe_id': recipe_id,
+                        'worker_id': production_context.get('worker_id')
+                    }
+                }]
+            })
+
+        # Insight 2: High-severity patterns
+        for pattern in patterns:
+            if pattern.get('severity') == 'high':
+                if pattern['pattern'] == 'low_yield_worker':
+                    insights.append({
+                        'type': 'opportunity',
+                        'priority': 'high',
+                        'category': 'production',
+                        'title': f'Worker Training Opportunity: {pattern["yield_impact"]:.1f}% Yield Gap',
+                        'description': pattern['description'] + f'. Improving this worker to average performance would save significant waste.',
+                        'impact_type': 'yield_improvement',
+                        'impact_value': pattern['yield_impact'],
+                        'impact_unit': 'percentage_points',
+                        'confidence': 85,
+                        'metrics_json': {
+                            'recipe_id': recipe_id,
+                            'pattern': pattern['pattern'],
+                            'yield_impact': pattern['yield_impact']
+                        },
+                        'actionable': True,
+                        'recommendation_actions': [{
+                            'label': 'Schedule Training',
+                            'action': 'schedule_worker_training',
+                            'params': {'recipe_id': recipe_id}
+                        }]
+                    })
+
+        # Insight 3: Excellent yield
+        if prediction['predicted_yield'] > 98:
+            insights.append({
+                'type': 'positive',
+                'priority': 'low',
+                'category': 'production',
+                'title': f'Excellent Yield Expected: {prediction["predicted_yield"]:.1f}%',
+                'description': f'Optimal production conditions detected. Expected yield {prediction["predicted_yield"]:.1f}% exceeds baseline {baseline_stats["mean_yield"]:.1f}%.',
+                'impact_type': 'yield_improvement',
+                'impact_value': prediction['baseline_comparison'],
+                'impact_unit': 'percentage_points',
+                'confidence': 70,
+                'metrics_json': {
+                    'recipe_id': recipe_id,
+                    'predicted_yield': prediction['predicted_yield']
+                },
+                'actionable': False
+            })
+
+        # Insight 4: Yield variability issue
+        if baseline_stats['cv_yield'] > 0.05:  # Coefficient of variation > 5%
+            insights.append({
+                'type': 'opportunity',
+                'priority': 'medium',
+                'category': 'production',
+                'title': f'High Yield Variability: {baseline_stats["cv_yield"]*100:.1f}% CV',
+                'description': f'Yield varies significantly across production runs (CV={baseline_stats["cv_yield"]*100:.1f}%, range {baseline_stats["min_yield"]:.1f}%-{baseline_stats["max_yield"]:.1f}%). Standardizing processes could reduce waste.',
+                'impact_type': 'process_improvement',
+                'confidence': 80,
+                'metrics_json': {
+                    'recipe_id': recipe_id,
+                    'cv_yield': round(baseline_stats['cv_yield'], 3),
+                    'yield_range': round(baseline_stats['max_yield'] - baseline_stats['min_yield'], 2)
+                },
+                'actionable': True,
+                'recommendation_actions': [{
+                    'label': 'Standardize Process',
+                    'action': 'review_production_sop',
+                    'params': {'recipe_id': recipe_id}
+                }]
+            })
+
+        return insights
+
+    def _calculate_prediction_confidence(
+        self,
+        production_history: pd.DataFrame,
+        model_results: Dict[str, Any],
+        factor_analysis: Dict[str, Any]
+    ) -> int:
+        """Calculate overall confidence score for predictions."""
+        confidence_factors = []
+
+        # Factor 1: Sample size (0-30 points)
+        n_runs = len(production_history)
+        if n_runs >= 100:
+            sample_score = 30
+        elif n_runs >= 50:
+            sample_score = 25
+        elif n_runs >= 30:
+            sample_score = 20
+        else:
+            sample_score = 10
+        confidence_factors.append(('sample_size', sample_score))
+
+        # Factor 2: Model performance (0-30 points)
+        r2 = model_results['performance']['r2']
+        mae = model_results['performance']['mae']
+
+        if r2 > 0.7 and mae < 3:
+            model_score = 30
+        elif r2 > 0.5 and mae < 5:
+            model_score = 25
+        elif r2 > 0.3 and mae < 7:
+            model_score = 20
+        else:
+            model_score = 10
+        confidence_factors.append(('model_performance', model_score))
+
+        # Factor 3: Statistical significance of factors (0-25 points)
+        significant_factors = sum(
+            1 for factor in factor_analysis.values()
+            if isinstance(factor, dict) and factor.get('significant')
+        )
+
+        if significant_factors >= 3:
+            stats_score = 25
+        elif significant_factors >= 2:
+            stats_score = 20
+        elif significant_factors >= 1:
+            stats_score = 15
+        else:
+            stats_score = 10
+        confidence_factors.append(('significant_factors', stats_score))
+
+        # Factor 4: Data recency (0-15 points)
+        most_recent = production_history['started_at'].max()
+        days_old = (datetime.utcnow() - pd.to_datetime(most_recent)).days
+
+        if days_old <= 7:
+            recency_score = 15
+        elif days_old <= 30:
+            recency_score = 12
+        elif days_old <= 90:
+            recency_score = 8
+        else:
+            recency_score = 5
+        confidence_factors.append(('data_recency', recency_score))
+
+        total_confidence = sum(score for _, score in confidence_factors)
+
+        return min(100, max(0, total_confidence))
+
+    async def analyze_recipe_yield_history(
+        self,
+        tenant_id: str,
+        recipe_id: str,
+        production_history: pd.DataFrame,
+        min_history_runs: int = 30
+    ) -> Dict[str, Any]:
+        """
+        Analyze historical yield performance for a recipe (no prediction).
+
+        Args:
+            tenant_id: Tenant identifier
+            recipe_id: Recipe identifier
+            production_history: Historical production runs
+            min_history_runs: Minimum production runs required
+
+        Returns:
+            Historical analysis with insights
+        """
+        logger.info(
+            "Analyzing recipe yield history",
+            tenant_id=tenant_id,
+            recipe_id=recipe_id,
+            history_runs=len(production_history)
+        )
+
+        if len(production_history) < min_history_runs:
+            return self._insufficient_data_response(
+                recipe_id, {}, len(production_history), min_history_runs
+            )
+
+        # Calculate statistics
+        baseline_stats = self._calculate_baseline_statistics(production_history)
+
+        # Feature engineering
+        feature_df = self._engineer_features(production_history)
+
+        # Analyze factors
+        factor_analysis = self._analyze_yield_factors(feature_df)
+
+        # Identify patterns
+        patterns = self._identify_yield_patterns(feature_df, factor_analysis)
+
+        # Generate insights (without prediction)
+        insights = []
+
+        # Add insights for patterns
+        for pattern in patterns:
+            if pattern.get('severity') in ['high', 'medium']:
+                insights.append({
+                    'type': 'opportunity',
+                    'priority': pattern['severity'],
+                    'category': 'production',
+                    'title': f'Yield Pattern Detected: {pattern["pattern"]}',
+                    'description': pattern['description'],
+                    'impact_type': 'yield_improvement',
+                    'confidence': 80,
+                    'metrics_json': {
+                        'recipe_id': recipe_id,
+                        'pattern': pattern
+                    },
+                    'actionable': True,
+                    'recommendation': pattern['recommendation']
+                })
+
+        return {
+            'recipe_id': recipe_id,
+            'analyzed_at': datetime.utcnow().isoformat(),
+            'history_runs': len(production_history),
+            'baseline_stats': baseline_stats,
+            'factor_analysis': factor_analysis,
+            'patterns': patterns,
+            'insights': insights
+        }
--- a/services/production/app/repositories/base.py
+++ b/services/production/app/repositories/base.py
@@ -11,7 +11,7 @@ import structlog

 from shared.database.repository import BaseRepository
 from shared.database.exceptions import DatabaseError
-from shared.database.transactions import transactional
+

 logger = structlog.get_logger()

@@ -56,7 +56,6 @@ class ProductionBaseRepository(BaseRepository):
            )
        return await self.get_by_tenant_id(tenant_id, skip, limit)
    
-    @transactional
    async def get_by_date_range(
        self, 
        tenant_id: str, 
@@ -89,7 +88,6 @@ class ProductionBaseRepository(BaseRepository):
                        error=str(e), tenant_id=tenant_id)
            raise DatabaseError(f"Failed to fetch records by date range: {str(e)}")
    
-    @transactional
    async def get_active_records(
        self, 
        tenant_id: str,
--- a/services/production/app/repositories/production_capacity_repository.py
+++ b/services/production/app/repositories/production_capacity_repository.py
@@ -13,7 +13,7 @@ import structlog
 from .base import ProductionBaseRepository
 from app.models.production import ProductionCapacity
 from shared.database.exceptions import DatabaseError, ValidationError
-from shared.database.transactions import transactional
+

 logger = structlog.get_logger()

@@ -25,7 +25,6 @@ class ProductionCapacityRepository(ProductionBaseRepository):
        # Capacity data changes moderately, medium cache time (10 minutes)
        super().__init__(ProductionCapacity, session, cache_ttl)
    
-    @transactional
    async def create_capacity(self, capacity_data: Dict[str, Any]) -> ProductionCapacity:
        """Create a new production capacity entry with validation"""
        try:
@@ -68,7 +67,6 @@ class ProductionCapacityRepository(ProductionBaseRepository):
            logger.error("Error creating production capacity", error=str(e))
            raise DatabaseError(f"Failed to create production capacity: {str(e)}")
    
-    @transactional
    async def get_capacity_by_resource(
        self, 
        tenant_id: str, 
@@ -101,7 +99,6 @@ class ProductionCapacityRepository(ProductionBaseRepository):
            logger.error("Error fetching capacity by resource", error=str(e))
            raise DatabaseError(f"Failed to fetch capacity by resource: {str(e)}")
    
-    @transactional
    async def get_available_capacity(
        self, 
        tenant_id: str, 
@@ -136,7 +133,6 @@ class ProductionCapacityRepository(ProductionBaseRepository):
            logger.error("Error fetching available capacity", error=str(e))
            raise DatabaseError(f"Failed to fetch available capacity: {str(e)}")
    
-    @transactional
    async def allocate_capacity(
        self, 
        capacity_id: UUID, 
@@ -183,7 +179,6 @@ class ProductionCapacityRepository(ProductionBaseRepository):
            logger.error("Error allocating capacity", error=str(e))
            raise DatabaseError(f"Failed to allocate capacity: {str(e)}")
    
-    @transactional
    async def release_capacity(
        self, 
        capacity_id: UUID, 
@@ -230,7 +225,6 @@ class ProductionCapacityRepository(ProductionBaseRepository):
            logger.error("Error releasing capacity", error=str(e))
            raise DatabaseError(f"Failed to release capacity: {str(e)}")
    
-    @transactional
    async def get_capacity_utilization_summary(
        self, 
        tenant_id: str, 
@@ -299,7 +293,6 @@ class ProductionCapacityRepository(ProductionBaseRepository):
            logger.error("Error calculating capacity utilization summary", error=str(e))
            raise DatabaseError(f"Failed to calculate capacity utilization summary: {str(e)}")
    
-    @transactional
    async def set_maintenance_mode(
        self, 
        capacity_id: UUID, 
--- a/services/production/app/repositories/production_schedule_repository.py
+++ b/services/production/app/repositories/production_schedule_repository.py
@@ -13,7 +13,7 @@ import structlog
 from .base import ProductionBaseRepository
 from app.models.production import ProductionSchedule
 from shared.database.exceptions import DatabaseError, ValidationError
-from shared.database.transactions import transactional
+

 logger = structlog.get_logger()

@@ -25,7 +25,6 @@ class ProductionScheduleRepository(ProductionBaseRepository):
        # Schedules are more stable, medium cache time (10 minutes)
        super().__init__(ProductionSchedule, session, cache_ttl)
    
-    @transactional
    async def create_schedule(self, schedule_data: Dict[str, Any]) -> ProductionSchedule:
        """Create a new production schedule with validation"""
        try:
@@ -71,7 +70,6 @@ class ProductionScheduleRepository(ProductionBaseRepository):
            logger.error("Error creating production schedule", error=str(e))
            raise DatabaseError(f"Failed to create production schedule: {str(e)}")
    
-    @transactional
    async def get_schedule_by_date(
        self, 
        tenant_id: str, 
@@ -101,7 +99,6 @@ class ProductionScheduleRepository(ProductionBaseRepository):
            logger.error("Error fetching schedule by date", error=str(e))
            raise DatabaseError(f"Failed to fetch schedule by date: {str(e)}")
    
-    @transactional
    async def get_schedules_by_date_range(
        self, 
        tenant_id: str, 
@@ -131,7 +128,6 @@ class ProductionScheduleRepository(ProductionBaseRepository):
            logger.error("Error fetching schedules by date range", error=str(e))
            raise DatabaseError(f"Failed to fetch schedules by date range: {str(e)}")
    
-    @transactional
    async def get_active_schedules(self, tenant_id: str) -> List[ProductionSchedule]:
        """Get active production schedules for a tenant"""
        try:
@@ -153,7 +149,6 @@ class ProductionScheduleRepository(ProductionBaseRepository):
            logger.error("Error fetching active schedules", error=str(e))
            raise DatabaseError(f"Failed to fetch active schedules: {str(e)}")
    
-    @transactional
    async def finalize_schedule(
        self, 
        schedule_id: UUID, 
@@ -188,7 +183,6 @@ class ProductionScheduleRepository(ProductionBaseRepository):
            logger.error("Error finalizing schedule", error=str(e))
            raise DatabaseError(f"Failed to finalize schedule: {str(e)}")
    
-    @transactional
    async def update_schedule_metrics(
        self, 
        schedule_id: UUID, 
@@ -227,7 +221,6 @@ class ProductionScheduleRepository(ProductionBaseRepository):
            logger.error("Error updating schedule metrics", error=str(e))
            raise DatabaseError(f"Failed to update schedule metrics: {str(e)}")
    
-    @transactional
    async def get_schedule_performance_summary(
        self, 
        tenant_id: str, 
--- a/services/production/app/repositories/quality_check_repository.py
+++ b/services/production/app/repositories/quality_check_repository.py
@@ -13,7 +13,7 @@ import structlog
 from .base import ProductionBaseRepository
 from app.models.production import QualityCheck
 from shared.database.exceptions import DatabaseError, ValidationError
-from shared.database.transactions import transactional
+

 logger = structlog.get_logger()

@@ -25,7 +25,6 @@ class QualityCheckRepository(ProductionBaseRepository):
        # Quality checks are dynamic, short cache time (5 minutes)
        super().__init__(QualityCheck, session, cache_ttl)
    
-    @transactional
    async def create_quality_check(self, check_data: Dict[str, Any]) -> QualityCheck:
        """Create a new quality check with validation"""
        try:
@@ -69,7 +68,6 @@ class QualityCheckRepository(ProductionBaseRepository):
            logger.error("Error creating quality check", error=str(e))
            raise DatabaseError(f"Failed to create quality check: {str(e)}")
    
-    @transactional
    async def get_checks_by_batch(
        self, 
        tenant_id: str, 
@@ -96,7 +94,6 @@ class QualityCheckRepository(ProductionBaseRepository):
            logger.error("Error fetching quality checks by batch", error=str(e))
            raise DatabaseError(f"Failed to fetch quality checks by batch: {str(e)}")
    
-    @transactional
    async def get_checks_by_date_range(
        self, 
        tenant_id: str, 
@@ -136,7 +133,6 @@ class QualityCheckRepository(ProductionBaseRepository):
            logger.error("Error fetching quality checks by date range", error=str(e))
            raise DatabaseError(f"Failed to fetch quality checks by date range: {str(e)}")
    
-    @transactional
    async def get_failed_checks(
        self, 
        tenant_id: str, 
@@ -167,7 +163,6 @@ class QualityCheckRepository(ProductionBaseRepository):
            logger.error("Error fetching failed quality checks", error=str(e))
            raise DatabaseError(f"Failed to fetch failed quality checks: {str(e)}")
    
-    @transactional
    async def get_quality_metrics(
        self, 
        tenant_id: str, 
@@ -247,7 +242,6 @@ class QualityCheckRepository(ProductionBaseRepository):
            logger.error("Error calculating quality metrics", error=str(e))
            raise DatabaseError(f"Failed to calculate quality metrics: {str(e)}")
    
-    @transactional
    async def get_quality_trends(
        self, 
        tenant_id: str, 
--- a/services/production/app/services/production_service.py
+++ b/services/production/app/services/production_service.py
@@ -952,6 +952,28 @@ class ProductionService:
            raise

    # Capacity Methods
+    async def get_capacity_by_date(
+        self,
+        tenant_id: UUID,
+        target_date: date
+    ) -> List[Dict[str, Any]]:
+        """Get capacity entries for a specific date"""
+        try:
+            async with self.database_manager.get_session() as session:
+                capacity_repo = ProductionCapacityRepository(session)
+
+                capacity_list = await capacity_repo.get_capacity_by_date(
+                    str(tenant_id), target_date
+                )
+
+                # Convert to dictionaries for API response
+                return [capacity.to_dict() for capacity in capacity_list]
+
+        except Exception as e:
+            logger.error("Error getting capacity by date",
+                        error=str(e), tenant_id=str(tenant_id), date=target_date.isoformat())
+            raise
+
    async def get_capacity_list(
        self,
        tenant_id: UUID,
--- a/services/production/requirements.txt
+++ b/services/production/requirements.txt
@@ -29,6 +29,12 @@ APScheduler==3.10.4
 python-dateutil==2.9.0.post0
 pytz==2024.2

+# Data processing for ML insights
+pandas==2.2.3
+numpy==2.2.1
+scikit-learn==1.6.1
+scipy==1.15.1
+
 # Validation and utilities
 email-validator==2.2.0

--- a/services/production/tests/test_yield_predictor.py
+++ b/services/production/tests/test_yield_predictor.py
@@ -0,0 +1,578 @@
+"""
+Tests for Production Yield Predictor
+"""
+
+import pytest
+import pandas as pd
+import numpy as np
+from datetime import datetime, timedelta
+from services.production.app.ml.yield_predictor import YieldPredictor
+
+
+@pytest.fixture
+def yield_predictor():
+    """Create YieldPredictor instance."""
+    return YieldPredictor()
+
+
+@pytest.fixture
+def stable_yield_history():
+    """Generate production history with stable high yield."""
+    np.random.seed(42)
+    base_date = datetime.utcnow() - timedelta(days=180)
+
+    history = []
+    for i in range(50):
+        run_date = base_date + timedelta(days=i * 3)
+
+        history.append({
+            'production_run_id': f'run_{i}',
+            'recipe_id': 'recipe_123',
+            'planned_quantity': 100,
+            'actual_quantity': np.random.normal(97, 1.5),  # 97% avg, low variance
+            'yield_percentage': np.random.normal(97, 1.5),
+            'worker_id': f'worker_{i % 3}',  # 3 workers
+            'started_at': run_date,
+            'completed_at': run_date + timedelta(hours=4),
+            'batch_size': np.random.randint(80, 120)
+        })
+
+    df = pd.DataFrame(history)
+    df['yield_percentage'] = df['yield_percentage'].clip(90, 100)
+    return df
+
+
+@pytest.fixture
+def variable_yield_history():
+    """Generate production history with variable yield."""
+    np.random.seed(42)
+    base_date = datetime.utcnow() - timedelta(days=180)
+
+    history = []
+    workers = ['worker_expert', 'worker_intermediate', 'worker_novice']
+    worker_skills = {'worker_expert': 96, 'worker_intermediate': 90, 'worker_novice': 82}
+
+    for i in range(60):
+        run_date = base_date + timedelta(days=i * 3)
+        worker = workers[i % 3]
+        base_yield = worker_skills[worker]
+
+        # Time of day effect
+        hour = (6 + i * 2) % 24
+        time_penalty = 5 if hour < 6 or hour > 22 else 0
+
+        # Batch size effect
+        batch_size = np.random.randint(50, 150)
+        batch_penalty = 3 if batch_size > 120 else 0
+
+        final_yield = base_yield - time_penalty - batch_penalty + np.random.normal(0, 2)
+
+        history.append({
+            'production_run_id': f'run_{i}',
+            'recipe_id': 'recipe_456',
+            'planned_quantity': 100,
+            'actual_quantity': final_yield,
+            'yield_percentage': final_yield,
+            'worker_id': worker,
+            'started_at': run_date.replace(hour=hour),
+            'completed_at': run_date.replace(hour=hour) + timedelta(hours=4),
+            'batch_size': batch_size
+        })
+
+    df = pd.DataFrame(history)
+    df['yield_percentage'] = df['yield_percentage'].clip(70, 100)
+    return df
+
+
+@pytest.fixture
+def low_yield_history():
+    """Generate production history with consistently low yield."""
+    np.random.seed(42)
+    base_date = datetime.utcnow() - timedelta(days=120)
+
+    history = []
+    for i in range(40):
+        run_date = base_date + timedelta(days=i * 3)
+
+        history.append({
+            'production_run_id': f'run_{i}',
+            'recipe_id': 'recipe_789',
+            'planned_quantity': 100,
+            'actual_quantity': np.random.normal(82, 5),  # 82% avg, high variance
+            'yield_percentage': np.random.normal(82, 5),
+            'worker_id': f'worker_{i % 2}',
+            'started_at': run_date,
+            'completed_at': run_date + timedelta(hours=4),
+            'batch_size': np.random.randint(80, 120)
+        })
+
+    df = pd.DataFrame(history)
+    df['yield_percentage'] = df['yield_percentage'].clip(60, 95)
+    return df
+
+
+@pytest.fixture
+def production_context_optimal():
+    """Production context for optimal conditions."""
+    return {
+        'worker_id': 'worker_expert',
+        'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=10),
+        'batch_size': 100,
+        'planned_quantity': 100,
+        'unit_cost': 5.0
+    }
+
+
+@pytest.fixture
+def production_context_suboptimal():
+    """Production context for suboptimal conditions."""
+    return {
+        'worker_id': 'worker_novice',
+        'planned_start_time': (datetime.utcnow() + timedelta(days=1)).replace(hour=4),
+        'batch_size': 140,
+        'planned_quantity': 100,
+        'unit_cost': 5.0
+    }
+
+
+class TestYieldPredictorBasics:
+    """Test basic functionality."""
+
+    @pytest.mark.asyncio
+    async def test_insufficient_data(self, yield_predictor):
+        """Test handling of insufficient production history."""
+        # Create minimal history (< 30 runs)
+        history = pd.DataFrame([{
+            'production_run_id': 'run_1',
+            'recipe_id': 'recipe_123',
+            'planned_quantity': 100,
+            'actual_quantity': 95,
+            'yield_percentage': 95,
+            'worker_id': 'worker_1',
+            'started_at': datetime.utcnow() - timedelta(days=1),
+            'completed_at': datetime.utcnow() - timedelta(hours=20),
+            'batch_size': 100
+        }])
+
+        context = {
+            'worker_id': 'worker_1',
+            'planned_start_time': datetime.utcnow() + timedelta(days=1),
+            'batch_size': 100,
+            'planned_quantity': 100
+        }
+
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_123',
+            production_history=history,
+            production_context=context,
+            min_history_runs=30
+        )
+
+        assert result['status'] == 'insufficient_data'
+        assert result['history_runs'] == 1
+        assert result['required_runs'] == 30
+        assert len(result['insights']) == 1
+        assert result['insights'][0]['type'] == 'warning'
+
+    @pytest.mark.asyncio
+    async def test_baseline_statistics_stable_yield(self, yield_predictor, stable_yield_history):
+        """Test baseline statistics calculation for stable yield."""
+        stats = yield_predictor._calculate_baseline_statistics(stable_yield_history)
+
+        assert 95 < stats['mean_yield'] < 99
+        assert stats['std_yield'] < 3  # Low variance
+        assert stats['cv_yield'] < 0.05  # Low coefficient of variation
+        assert stats['min_yield'] >= 90
+        assert stats['max_yield'] <= 100
+
+    @pytest.mark.asyncio
+    async def test_baseline_statistics_variable_yield(self, yield_predictor, variable_yield_history):
+        """Test baseline statistics for variable yield."""
+        stats = yield_predictor._calculate_baseline_statistics(variable_yield_history)
+
+        assert 85 < stats['mean_yield'] < 93
+        assert stats['std_yield'] > 3  # Higher variance
+        assert stats['cv_yield'] > 0.03
+        assert stats['runs_below_90'] > 0
+
+
+class TestFeatureEngineering:
+    """Test feature engineering."""
+
+    @pytest.mark.asyncio
+    async def test_time_features(self, yield_predictor, stable_yield_history):
+        """Test time-based feature extraction."""
+        feature_df = yield_predictor._engineer_features(stable_yield_history)
+
+        assert 'hour_of_day' in feature_df.columns
+        assert 'day_of_week' in feature_df.columns
+        assert 'is_weekend' in feature_df.columns
+        assert 'is_early_morning' in feature_df.columns
+        assert 'is_late_night' in feature_df.columns
+
+        assert feature_df['hour_of_day'].min() >= 0
+        assert feature_df['hour_of_day'].max() <= 23
+        assert feature_df['day_of_week'].min() >= 0
+        assert feature_df['day_of_week'].max() <= 6
+
+    @pytest.mark.asyncio
+    async def test_batch_size_features(self, yield_predictor, stable_yield_history):
+        """Test batch size feature engineering."""
+        feature_df = yield_predictor._engineer_features(stable_yield_history)
+
+        assert 'batch_size_normalized' in feature_df.columns
+        assert 'is_large_batch' in feature_df.columns
+        assert 'is_small_batch' in feature_df.columns
+
+        # Normalized batch size should be around 1.0 on average
+        assert 0.5 < feature_df['batch_size_normalized'].mean() < 1.5
+
+    @pytest.mark.asyncio
+    async def test_worker_experience_features(self, yield_predictor, variable_yield_history):
+        """Test worker experience feature engineering."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+
+        assert 'worker_run_count' in feature_df.columns
+        assert 'worker_experience_level' in feature_df.columns
+
+        # Worker run count should increase for each worker
+        for worker in feature_df['worker_id'].unique():
+            worker_runs = feature_df[feature_df['worker_id'] == worker]['worker_run_count']
+            assert worker_runs.is_monotonic_increasing
+
+
+class TestFactorAnalysis:
+    """Test yield factor analysis."""
+
+    @pytest.mark.asyncio
+    async def test_worker_impact_detection(self, yield_predictor, variable_yield_history):
+        """Test detection of worker impact on yield."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
+
+        assert 'worker' in factor_analysis
+        # Should detect worker skill differences
+        if factor_analysis['worker'].get('significant'):
+            assert 'best_worker' in factor_analysis['worker']
+            assert 'worst_worker' in factor_analysis['worker']
+            assert factor_analysis['worker']['yield_range'] > 0
+
+    @pytest.mark.asyncio
+    async def test_batch_size_correlation(self, yield_predictor, variable_yield_history):
+        """Test batch size correlation analysis."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
+
+        assert 'batch_size' in factor_analysis
+        if factor_analysis['batch_size'].get('significant'):
+            assert 'correlation' in factor_analysis['batch_size']
+            assert 'direction' in factor_analysis['batch_size']
+            assert factor_analysis['batch_size']['direction'] in ['positive', 'negative']
+
+    @pytest.mark.asyncio
+    async def test_time_of_day_effect(self, yield_predictor, variable_yield_history):
+        """Test time of day effect analysis."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
+
+        assert 'time_of_day' in factor_analysis
+
+
+class TestYieldPrediction:
+    """Test yield prediction."""
+
+    @pytest.mark.asyncio
+    async def test_predict_stable_yield(self, yield_predictor, stable_yield_history, production_context_optimal):
+        """Test prediction for stable yield recipe."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_123',
+            production_history=stable_yield_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        assert result['status'] != 'insufficient_data'
+        assert result['predicted_yield'] is not None
+        assert 90 < result['predicted_yield'] < 100
+        assert result['confidence'] > 0
+        assert 'prediction_range' in result
+        assert result['prediction_range']['lower'] < result['predicted_yield']
+        assert result['prediction_range']['upper'] > result['predicted_yield']
+
+    @pytest.mark.asyncio
+    async def test_predict_variable_yield_optimal_context(
+        self, yield_predictor, variable_yield_history, production_context_optimal
+    ):
+        """Test prediction with optimal production context."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_456',
+            production_history=variable_yield_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        assert result['predicted_yield'] is not None
+        # Optimal context should predict higher yield
+        assert result['predicted_yield'] > result['baseline_yield'] - 5
+
+    @pytest.mark.asyncio
+    async def test_predict_variable_yield_suboptimal_context(
+        self, yield_predictor, variable_yield_history, production_context_suboptimal
+    ):
+        """Test prediction with suboptimal production context."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_456',
+            production_history=variable_yield_history,
+            production_context=production_context_suboptimal,
+            min_history_runs=30
+        )
+
+        assert result['predicted_yield'] is not None
+        # Suboptimal context (novice worker, early morning, large batch)
+        # should predict lower yield
+
+    @pytest.mark.asyncio
+    async def test_expected_waste_calculation(
+        self, yield_predictor, low_yield_history, production_context_optimal
+    ):
+        """Test expected waste calculation."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_789',
+            production_history=low_yield_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        assert 'expected_waste' in result
+        assert result['expected_waste'] > 0
+        # For low yield (82%), waste should be significant
+        expected_waste_pct = 100 - result['predicted_yield']
+        assert expected_waste_pct > 5
+
+
+class TestPatternDetection:
+    """Test yield pattern identification."""
+
+    @pytest.mark.asyncio
+    async def test_low_yield_worker_pattern(self, yield_predictor, variable_yield_history):
+        """Test detection of low-yield worker pattern."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
+        patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)
+
+        # Should detect novice worker pattern
+        low_worker_patterns = [p for p in patterns if p['pattern'] == 'low_yield_worker']
+        if factor_analysis.get('worker', {}).get('significant'):
+            assert len(low_worker_patterns) > 0
+            pattern = low_worker_patterns[0]
+            assert pattern['severity'] in ['high', 'medium', 'low']
+            assert 'recommendation' in pattern
+
+    @pytest.mark.asyncio
+    async def test_time_of_day_pattern(self, yield_predictor, variable_yield_history):
+        """Test detection of time-of-day pattern."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+        factor_analysis = yield_predictor._analyze_yield_factors(feature_df)
+        patterns = yield_predictor._identify_yield_patterns(feature_df, factor_analysis)
+
+        # May detect early morning low yield pattern
+        time_patterns = [p for p in patterns if p['pattern'] == 'low_yield_time']
+        # Patterns are conditional on statistical significance
+
+
+class TestInsightGeneration:
+    """Test insight generation."""
+
+    @pytest.mark.asyncio
+    async def test_low_yield_warning_insight(
+        self, yield_predictor, low_yield_history, production_context_optimal
+    ):
+        """Test generation of low yield warning insight."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_789',
+            production_history=low_yield_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        # Should generate low yield warning
+        warning_insights = [i for i in result['insights'] if i['type'] == 'warning']
+        assert len(warning_insights) > 0
+
+        warning = warning_insights[0]
+        assert warning['priority'] in ['high', 'medium']
+        assert warning['category'] == 'production'
+        assert 'impact_value' in warning
+        assert warning['actionable'] is True
+
+    @pytest.mark.asyncio
+    async def test_excellent_yield_insight(
+        self, yield_predictor, stable_yield_history, production_context_optimal
+    ):
+        """Test generation of excellent yield insight."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_123',
+            production_history=stable_yield_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        # May generate positive insight for excellent yield
+        positive_insights = [i for i in result['insights'] if i['type'] == 'positive']
+        if result['predicted_yield'] > 98:
+            assert len(positive_insights) > 0
+
+    @pytest.mark.asyncio
+    async def test_yield_variability_insight(
+        self, yield_predictor, variable_yield_history, production_context_optimal
+    ):
+        """Test generation of yield variability insight."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_456',
+            production_history=variable_yield_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        # Should detect high variability
+        if result['baseline_std'] / result['baseline_yield'] > 0.05:
+            variability_insights = [
+                i for i in result['insights']
+                if 'variability' in i['title'].lower() or 'variability' in i['description'].lower()
+            ]
+            assert len(variability_insights) > 0
+
+
+class TestConfidenceScoring:
+    """Test confidence score calculation."""
+
+    @pytest.mark.asyncio
+    async def test_high_confidence_large_sample(
+        self, yield_predictor, stable_yield_history, production_context_optimal
+    ):
+        """Test high confidence with large stable sample."""
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_123',
+            production_history=stable_yield_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        # Large sample + stable data should give high confidence
+        assert result['confidence'] > 60
+
+    @pytest.mark.asyncio
+    async def test_lower_confidence_small_sample(self, yield_predictor, production_context_optimal):
+        """Test lower confidence with small sample."""
+        # Create small history (exactly 30 runs)
+        small_history = pd.DataFrame([{
+            'production_run_id': f'run_{i}',
+            'recipe_id': 'recipe_123',
+            'planned_quantity': 100,
+            'actual_quantity': 95 + np.random.normal(0, 2),
+            'yield_percentage': 95 + np.random.normal(0, 2),
+            'worker_id': 'worker_1',
+            'started_at': datetime.utcnow() - timedelta(days=90-i),
+            'completed_at': datetime.utcnow() - timedelta(days=90-i, hours=-4),
+            'batch_size': 100
+        } for i in range(30)])
+
+        result = await yield_predictor.predict_yield(
+            tenant_id='tenant_123',
+            recipe_id='recipe_123',
+            production_history=small_history,
+            production_context=production_context_optimal,
+            min_history_runs=30
+        )
+
+        # Small sample should give moderate confidence
+        assert result['confidence'] < 85
+
+
+class TestHistoricalAnalysis:
+    """Test historical analysis (no prediction)."""
+
+    @pytest.mark.asyncio
+    async def test_analyze_recipe_history(self, yield_predictor, variable_yield_history):
+        """Test historical analysis without prediction."""
+        result = await yield_predictor.analyze_recipe_yield_history(
+            tenant_id='tenant_123',
+            recipe_id='recipe_456',
+            production_history=variable_yield_history,
+            min_history_runs=30
+        )
+
+        assert result['recipe_id'] == 'recipe_456'
+        assert 'baseline_stats' in result
+        assert 'factor_analysis' in result
+        assert 'patterns' in result
+        assert 'insights' in result
+
+    @pytest.mark.asyncio
+    async def test_analyze_insufficient_history(self, yield_predictor):
+        """Test analysis with insufficient history."""
+        small_history = pd.DataFrame([{
+            'production_run_id': 'run_1',
+            'recipe_id': 'recipe_123',
+            'planned_quantity': 100,
+            'actual_quantity': 95,
+            'yield_percentage': 95,
+            'worker_id': 'worker_1',
+            'started_at': datetime.utcnow() - timedelta(days=1),
+            'completed_at': datetime.utcnow() - timedelta(hours=20),
+            'batch_size': 100
+        }])
+
+        result = await yield_predictor.analyze_recipe_yield_history(
+            tenant_id='tenant_123',
+            recipe_id='recipe_123',
+            production_history=small_history,
+            min_history_runs=30
+        )
+
+        assert result['status'] == 'insufficient_data'
+
+
+class TestModelPerformance:
+    """Test ML model performance."""
+
+    @pytest.mark.asyncio
+    async def test_model_training(self, yield_predictor, variable_yield_history):
+        """Test model training and performance metrics."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+        model_results = yield_predictor._train_yield_model(feature_df)
+
+        assert 'best_model' in model_results
+        assert 'best_model_name' in model_results
+        assert 'performance' in model_results
+        assert 'feature_importance' in model_results
+
+        performance = model_results['performance']
+        assert 'mae' in performance
+        assert 'rmse' in performance
+        assert 'r2' in performance
+
+        # MAE should be reasonable (< 15 percentage points)
+        assert performance['mae'] < 15
+
+    @pytest.mark.asyncio
+    async def test_feature_importance(self, yield_predictor, variable_yield_history):
+        """Test feature importance extraction."""
+        feature_df = yield_predictor._engineer_features(variable_yield_history)
+        model_results = yield_predictor._train_yield_model(feature_df)
+
+        feature_importance = model_results['feature_importance']
+
+        # Should have feature importances
+        if len(feature_importance) > 0:
+            # Worker encoding should be important (due to skill differences)
+            assert 'worker_encoded' in feature_importance or len(feature_importance) > 0