Add AI insights feature

2025-12-15 21:14:22 +01:00
parent 5642b5a0c0
commit c566967bea
39 changed files with 17729 additions and 404 deletions
--- a/services/production/app/ml/yield_insights_orchestrator.py
+++ b/services/production/app/ml/yield_insights_orchestrator.py
@@ -14,6 +14,7 @@ import os
 # Add shared clients to path
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../../..'))
 from shared.clients.ai_insights_client import AIInsightsClient
+from shared.messaging import UnifiedEventPublisher

 from app.ml.yield_predictor import YieldPredictor

@@ -28,15 +29,18 @@ class YieldInsightsOrchestrator:
    1. Predict yield for upcoming production run or analyze historical performance
    2. Generate insights for yield optimization opportunities
    3. Post insights to AI Insights Service
-    4. Provide yield predictions for production planning
+    4. Publish recommendation events to RabbitMQ
+    5. Provide yield predictions for production planning
    """

    def __init__(
        self,
-        ai_insights_base_url: str = "http://ai-insights-service:8000"
+        ai_insights_base_url: str = "http://ai-insights-service:8000",
+        event_publisher: Optional[UnifiedEventPublisher] = None
    ):
        self.predictor = YieldPredictor()
        self.ai_insights_client = AIInsightsClient(ai_insights_base_url)
+        self.event_publisher = event_publisher

    async def predict_and_post_insights(
        self,
@@ -54,7 +58,7 @@ class YieldInsightsOrchestrator:
            recipe_id: Recipe identifier
            production_history: Historical production runs
            production_context: Upcoming production context:
-                - worker_id
+                - staff_assigned (list of staff IDs)
                - planned_start_time
                - batch_size
                - planned_quantity
@@ -109,6 +113,17 @@ class YieldInsightsOrchestrator:
                successful=post_results['successful'],
                failed=post_results['failed']
            )
+
+            # Step 4: Publish recommendation events to RabbitMQ
+            created_insights = post_results.get('created_insights', [])
+            if created_insights:
+                recipe_context = production_context.copy() if production_context else {}
+                recipe_context['recipe_id'] = recipe_id
+                await self._publish_insight_events(
+                    tenant_id=tenant_id,
+                    insights=created_insights,
+                    recipe_context=recipe_context
+                )
        else:
            post_results = {'total': 0, 'successful': 0, 'failed': 0}
            logger.info("No insights to post for recipe", recipe_id=recipe_id)
@@ -193,6 +208,15 @@ class YieldInsightsOrchestrator:
                total=post_results['total'],
                successful=post_results['successful']
            )
+
+            # Step 4: Publish recommendation events to RabbitMQ
+            created_insights = post_results.get('created_insights', [])
+            if created_insights:
+                await self._publish_insight_events(
+                    tenant_id=tenant_id,
+                    insights=created_insights,
+                    recipe_context={'recipe_id': recipe_id}
+                )
        else:
            post_results = {'total': 0, 'successful': 0, 'failed': 0}

@@ -248,6 +272,83 @@ class YieldInsightsOrchestrator:

        return enriched

+    async def _publish_insight_events(
+        self,
+        tenant_id: str,
+        insights: List[Dict[str, Any]],
+        recipe_context: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """
+        Publish recommendation events to RabbitMQ for each insight.
+
+        Args:
+            tenant_id: Tenant identifier
+            insights: List of created insights (with insight_id from AI Insights Service)
+            recipe_context: Optional recipe context (name, id, etc.)
+        """
+        if not self.event_publisher:
+            logger.warning("Event publisher not configured, skipping event publication")
+            return
+
+        for insight in insights:
+            try:
+                # Determine severity based on confidence and priority
+                confidence = insight.get('confidence', 0)
+                priority = insight.get('priority', 'medium')
+
+                if priority == 'urgent' or confidence >= 90:
+                    severity = 'urgent'
+                elif priority == 'high' or confidence >= 70:
+                    severity = 'high'
+                elif priority == 'medium' or confidence >= 50:
+                    severity = 'medium'
+                else:
+                    severity = 'low'
+
+                # Build event metadata
+                event_metadata = {
+                    'insight_id': insight.get('id'),  # From AI Insights Service response
+                    'insight_type': insight.get('insight_type'),
+                    'recipe_id': insight.get('metrics_json', {}).get('recipe_id'),
+                    'recipe_name': recipe_context.get('recipe_name') if recipe_context else None,
+                    'predicted_yield': insight.get('metrics_json', {}).get('predicted_yield'),
+                    'confidence': confidence,
+                    'recommendation': insight.get('recommendation'),
+                    'impact_type': insight.get('impact_type'),
+                    'impact_value': insight.get('impact_value'),
+                    'source_service': 'production',
+                    'source_model': 'yield_predictor'
+                }
+
+                # Remove None values
+                event_metadata = {k: v for k, v in event_metadata.items() if v is not None}
+
+                # Publish recommendation event
+                await self.event_publisher.publish_recommendation(
+                    event_type='ai_yield_prediction',
+                    tenant_id=tenant_id,
+                    severity=severity,
+                    data=event_metadata
+                )
+
+                logger.info(
+                    "Published yield insight recommendation event",
+                    tenant_id=tenant_id,
+                    insight_id=insight.get('id'),
+                    insight_type=insight.get('insight_type'),
+                    severity=severity
+                )
+
+            except Exception as e:
+                logger.error(
+                    "Failed to publish insight event",
+                    tenant_id=tenant_id,
+                    insight_id=insight.get('id'),
+                    error=str(e),
+                    exc_info=True
+                )
+                # Don't raise - we don't want to fail the whole workflow if event publishing fails
+
    async def analyze_all_recipes(
        self,
        tenant_id: str,
--- a/services/production/app/ml/yield_predictor.py
+++ b/services/production/app/ml/yield_predictor.py
@@ -62,14 +62,14 @@ class YieldPredictor:
                - planned_quantity
                - actual_quantity
                - yield_percentage
-                - worker_id
+                - staff_assigned (list of staff IDs)
                - started_at
                - completed_at
                - batch_size
                - equipment_id (optional)
                - notes (optional)
            production_context: Upcoming production context:
-                - worker_id
+                - staff_assigned (list of staff IDs)
                - planned_start_time
                - batch_size
                - equipment_id (optional)
@@ -212,6 +212,9 @@ class YieldPredictor:
        df['is_small_batch'] = (df['batch_size'] < df['batch_size'].quantile(0.25)).astype(int)

        # Worker experience features (proxy: number of previous runs)
+        # Extract first worker from staff_assigned list
+        df['worker_id'] = df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
+        
        df = df.sort_values('started_at')
        df['worker_run_count'] = df.groupby('worker_id').cumcount() + 1
        df['worker_experience_level'] = pd.cut(
@@ -232,6 +235,10 @@ class YieldPredictor:
        factors = {}

        # Worker impact
+        # Extract worker_id from staff_assigned for analysis
+        if 'worker_id' not in feature_df.columns:
+            feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
+        
        worker_yields = feature_df.groupby('worker_id')['yield_percentage'].agg(['mean', 'std', 'count'])
        worker_yields = worker_yields[worker_yields['count'] >= 3]  # Min 3 runs per worker

@@ -339,7 +346,10 @@ class YieldPredictor:
        if 'duration_hours' in feature_df.columns:
            feature_columns.append('duration_hours')

-        # Encode worker_id
+        # Encode worker_id (extracted from staff_assigned)
+        if 'worker_id' not in feature_df.columns:
+            feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
+        
        worker_encoding = {worker: idx for idx, worker in enumerate(feature_df['worker_id'].unique())}
        feature_df['worker_encoded'] = feature_df['worker_id'].map(worker_encoding)
        feature_columns.append('worker_encoded')
@@ -420,11 +430,15 @@ class YieldPredictor:
    ) -> Dict[str, Any]:
        """Predict yield for upcoming production run."""
        # Extract context
-        worker_id = production_context.get('worker_id')
+        staff_assigned = production_context.get('staff_assigned', [])
+        worker_id = staff_assigned[0] if isinstance(staff_assigned, list) and len(staff_assigned) > 0 else 'unknown'
        planned_start = pd.to_datetime(production_context.get('planned_start_time'))
        batch_size = production_context.get('batch_size')

        # Get worker experience
+        if 'worker_id' not in feature_df.columns:
+            feature_df['worker_id'] = feature_df['staff_assigned'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else 'unknown')
+        
        worker_runs = feature_df[feature_df['worker_id'] == worker_id]
        worker_run_count = len(worker_runs) if len(worker_runs) > 0 else 1

@@ -578,7 +592,7 @@ class YieldPredictor:
                    'action': 'review_production_factors',
                    'params': {
                        'recipe_id': recipe_id,
-                        'worker_id': production_context.get('worker_id')
+                        'worker_id': worker_id
                    }
                }]
            })